Example #1
0
    def member_remove(self, lb_id, pool_id, member_id):
        """Delete a member from Neutron lbaas pool.

        :param lb_id: The ID of the loadbalancer the operation is targeted at;
        :param pool_id: The ID of the pool from which the member is deleted;
        :param member_id: The ID of the LB member.
        :returns: True if the operation succeeded or False if errors occurred.
        """
        try:
            # FIXME(Yanyan Hu): Currently, Neutron lbaasv2 service can not
            # handle concurrent lb member operations well: new member creation
            # deletion request will directly fail rather than being lined up
            # when another operation is still in progress. In this workaround,
            # loadbalancer status will be checked before deleting lb member
            # request is sent out. If loadbalancer keeps unready till waiting
            # timeout, exception will be raised to fail member_remove.
            res = self._wait_for_lb_ready(lb_id)
            if not res:
                msg = _LE('Loadbalancer %s is not ready.') % lb_id
                raise exception.Error(msg)
            self.nc().pool_member_delete(pool_id, member_id)
        except (exception.InternalError, exception.Error) as ex:
            msg = _LE('Failed in removing member %(m)s from pool %(p)s: '
                      '%(ex)s') % {'m': member_id, 'p': pool_id,
                                   'ex': six.text_type(ex)}
            LOG.exception(msg)
            return None
        res = self._wait_for_lb_ready(lb_id)
        if res is False:
            LOG.error(_LE('Failed in deleting pool member (%s).') % member_id)
            return None

        return True
Example #2
0
    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        :returns: ``None``.
        """
        if action.action == consts.CLUSTER_SCALE_IN:
            expand = False
            # use action input directly if available
            count = action.inputs.get('count', None)
            if not count:
                # check if policy decisions available
                pd = action.data.get('deletion', None)
                count = pd.get('count', 1) if pd else 1
        else:
            # this is an action that inflates the cluster
            expand = True
            count = action.inputs.get('count', None)
            if not count:
                # check if policy decisions available
                pd = action.data.get('creation', None)
                count = pd.get('count', 1) if pd else 1

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        kc = self._keystone(cluster)

        regions_good = kc.validate_regions(self.regions.keys())
        if len(regions_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No region is found usable.')
            LOG.error(_LE('No region is found usable.'))
            return

        regions = {}
        for r in self.regions.items():
            if r[0] in regions_good:
                regions[r[0]] = r[1]

        current_dist = cluster.get_region_distribution(regions_good)
        result = self._create_plan(current_dist, regions, count, expand)
        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['regions'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['regions'] = result
Example #3
0
    def __call__(self, request):
        """WSGI method that controls (de)serialization and method dispatch."""
        action_args = self.get_action_args(request.environ)
        action = action_args.pop('action', None)

        try:
            deserialized_request = self.dispatch(self.deserializer, action,
                                                 request)
            action_args.update(deserialized_request)

            LOG.debug(('Calling %(controller)s : %(action)s'), {
                'controller': self.controller,
                'action': action
            })

            action_result = self.dispatch(self.controller, action, request,
                                          **action_args)
        except TypeError as err:
            LOG.error(_LE('Exception handling resource: %s') % err)
            msg = _('The server could not comply with the request since '
                    'it is either malformed or otherwise incorrect.')
            err = webob.exc.HTTPBadRequest(msg)
            http_exc = translate_exception(err, request.best_match_language())
            # NOTE(luisg): We disguise HTTP exceptions, otherwise they will be
            # treated by wsgi as responses ready to be sent back and they
            # won't make it into the pipeline app that serializes errors
            raise exception.HTTPExceptionDisguise(http_exc)
        except webob.exc.HTTPException as err:
            if not isinstance(err, webob.exc.HTTPError):
                # Some HTTPException are actually not errors, they are
                # responses ready to be sent back to the users, so we don't
                # create error log, but disguise and translate them to meet
                # openstacksdk's need.
                http_exc = translate_exception(err,
                                               request.best_match_language())
                raise exception.HTTPExceptionDisguise(http_exc)
            if isinstance(err, webob.exc.HTTPServerError):
                LOG.error(_LE("Returning %(code)s to user: %(explanation)s"), {
                    'code': err.code,
                    'explanation': err.explanation
                })
            http_exc = translate_exception(err, request.best_match_language())
            raise exception.HTTPExceptionDisguise(http_exc)
        except exception.SenlinException as err:
            raise translate_exception(err, request.best_match_language())
        except Exception as err:
            log_exception(err, sys.exc_info())
            raise translate_exception(err, request.best_match_language())

        serializer = self.serializer or serializers.JSONResponseSerializer()
        try:
            response = webob.Response(request=request)
            self.dispatch(serializer, action, response, action_result)
            return response

        # return unserializable result (typically an exception)
        except Exception:
            return action_result
Example #4
0
    def member_add(self, node, lb_id, pool_id, port, subnet):
        """Add a member to Neutron lbaas pool.

        :param node: A node object to be added to the specified pool.
        :param lb_id: The ID of the loadbalancer.
        :param pool_id: The ID of the pool for receiving the node.
        :param port: The port for the new LB member to be created.
        :param subnet: The subnet to be used by the new LB member.
        :returns: The ID of the new LB member or None if errors occurred.
        """
        try:
            subnet_obj = self.nc().subnet_get(subnet)
            net_id = subnet_obj.network_id
            net = self.nc().network_get(net_id)
        except exception.InternalError as ex:
            resource = 'subnet' if subnet in ex.message else 'network'
            msg = _LE('Failed in getting %(resource)s: %(msg)s.'
                      ) % {'resource': resource, 'msg': six.text_type(ex)}
            LOG.exception(msg)
            return None
        net_name = net.name

        node_detail = node.get_details(oslo_context.get_current())
        addresses = node_detail.get('addresses')
        if net_name not in addresses:
            msg = _LE('Node is not in subnet %(subnet)s')
            LOG.error(msg, {'subnet': subnet})
            return None

        # Use the first IP address if more than one are found in target network
        address = addresses[net_name][0]['addr']
        try:
            # FIXME(Yanyan Hu): Currently, Neutron lbaasv2 service can not
            # handle concurrent lb member operations well: new member creation
            # deletion request will directly fail rather than being lined up
            # when another operation is still in progress. In this workaround,
            # loadbalancer status will be checked before creating lb member
            # request is sent out. If loadbalancer keeps unready till waiting
            # timeout, exception will be raised to fail member_add.
            res = self._wait_for_lb_ready(lb_id)
            if not res:
                msg = _LE('Loadbalancer %s is not ready.') % lb_id
                raise exception.Error(msg)
            member = self.nc().pool_member_create(pool_id, address, port,
                                                  subnet_obj.id)
        except (exception.InternalError, exception.Error) as ex:
            msg = _LE('Failed in creating lb pool member: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            return None
        res = self._wait_for_lb_ready(lb_id)
        if res is False:
            LOG.error(_LE('Failed in creating pool member (%s).') % member.id)
            return None

        return member.id
Example #5
0
    def __call__(self, request):
        """WSGI method that controls (de)serialization and method dispatch."""
        action_args = self.get_action_args(request.environ)
        action = action_args.pop('action', None)

        try:
            deserialized_request = self.dispatch(self.deserializer,
                                                 action, request)
            action_args.update(deserialized_request)

            LOG.debug(('Calling %(controller)s : %(action)s'),
                      {'controller': self.controller, 'action': action})

            action_result = self.dispatch(self.controller, action,
                                          request, **action_args)
        except TypeError as err:
            LOG.error(_LE('Exception handling resource: %s') % err)
            msg = _('The server could not comply with the request since '
                    'it is either malformed or otherwise incorrect.')
            err = webob.exc.HTTPBadRequest(msg)
            http_exc = translate_exception(err, request.best_match_language())
            # NOTE(luisg): We disguise HTTP exceptions, otherwise they will be
            # treated by wsgi as responses ready to be sent back and they
            # won't make it into the pipeline app that serializes errors
            raise exception.HTTPExceptionDisguise(http_exc)
        except webob.exc.HTTPException as err:
            if not isinstance(err, webob.exc.HTTPError):
                # Some HTTPException are actually not errors, they are
                # responses ready to be sent back to the users, so we don't
                # create error log, but disguise and translate them to meet
                # openstacksdk's need.
                http_exc = translate_exception(err,
                                               request.best_match_language())
                raise exception.HTTPExceptionDisguise(http_exc)
            if isinstance(err, webob.exc.HTTPServerError):
                LOG.error(
                    _LE("Returning %(code)s to user: %(explanation)s"),
                    {'code': err.code, 'explanation': err.explanation})
            http_exc = translate_exception(err, request.best_match_language())
            raise exception.HTTPExceptionDisguise(http_exc)
        except exception.SenlinException as err:
            raise translate_exception(err, request.best_match_language())
        except Exception as err:
            log_exception(err, sys.exc_info())
            raise translate_exception(err, request.best_match_language())

        serializer = self.serializer or serializers.JSONResponseSerializer()
        try:
            response = webob.Response(request=request)
            self.dispatch(serializer, action, response, action_result)
            return response

        # return unserializable result (typically an exception)
        except Exception:
            return action_result
Example #6
0
    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """

        count = action.inputs.get('count', None)
        if action.action == consts.CLUSTER_SCALE_IN:
            expand = False
            if not count:
                pd = action.data.get('deletion', None)
                count = pd.get('count', 1) if pd else 1
        else:
            expand = True
            if not count:
                pd = action.data.get('creation', None)
                count = pd.get('count', 1) if pd else 1

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        nc = self._nova(cluster)
        zones_good = nc.validate_azs(self.zones.keys())
        if len(zones_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error(_LE('No availability zone found available.'))
            return

        zones = {}
        for z, w in self.zones.items():
            if z in zones_good:
                zones[z] = w

        current = cluster.get_zone_distribution(action.context, zones.keys())
        result = self._create_plan(current, zones, count, expand)

        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['zones'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['zones'] = result
Example #7
0
    def member_add(self, node, lb_id, pool_id, port, subnet):
        """Add a member to Neutron lbaas pool.

        :param node: A node object to be added to the specified pool.
        :param lb_id: The ID of the loadbalancer.
        :param pool_id: The ID of the pool for receiving the node.
        :param port: The port for the new LB member to be created.
        :param subnet: The subnet to be used by the new LB member.
        :returns: The ID of the new LB member or None if errors occurred.
        """
        addresses = self._get_node_address(node, version=4)
        if not addresses:
            LOG.error(_LE('Node (%(n)s) does not have valid IPv4 address.'),
                      {'n': node.id})
            return None

        try:
            subnet_obj = self.nc().subnet_get(subnet)
            net_id = subnet_obj.network_id
            net = self.nc().network_get(net_id)
        except exception.InternalError as ex:
            resource = 'subnet' if subnet in ex.message else 'network'
            msg = _LE('Failed in getting %(resource)s: %(msg)s.'
                      ) % {'resource': resource, 'msg': six.text_type(ex)}
            LOG.exception(msg)
            event.warning(oslo_context.get_current(), self,
                          resource.upper()+'_GET', 'ERROR', msg)
            return None
        net_name = net.name

        if net_name not in addresses:
            LOG.error(_LE('Node is not in subnet %(subnet)s'),
                      {'subnet': subnet})
            return None

        address = addresses[net_name]
        try:
            member = self.nc().pool_member_create(pool_id, address, port,
                                                  subnet_obj.id)
        except exception.InternalError as ex:
            msg = _LE('Failed in creating lb pool member: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            event.warning(oslo_context.get_current(), self,
                          'POOL_MEMBER_CREATE', 'ERROR', msg)
            return None
        res = self._wait_for_lb_ready(lb_id)
        if res is False:
            LOG.error(_LE('Failed in creating pool member (%s).') % member.id)
            return None

        return member.id
Example #8
0
    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        :returns: ``None``.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        kc = self._keystone(cluster)

        regions_good = kc.validate_regions(self.regions.keys())
        if len(regions_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No region is found usable.')
            LOG.error(_LE('No region is found usable.'))
            return

        regions = {}
        for r in self.regions.items():
            if r[0] in regions_good:
                regions[r[0]] = r[1]

        current_dist = cluster.get_region_distribution(regions_good)
        result = self._create_plan(current_dist, regions, count, expand)
        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['regions'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['regions'] = result
Example #9
0
    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        :returns: ``None``.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cm.Cluster.load(action.context, cluster_id)

        kc = self._keystone(cluster)

        regions_good = kc.validate_regions(self.regions.keys())
        if len(regions_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No region is found usable.')
            LOG.error(_LE('No region is found usable.'))
            return

        regions = {}
        for r in self.regions.items():
            if r[0] in regions_good:
                regions[r[0]] = r[1]

        current_dist = cluster.get_region_distribution(regions_good)
        result = self._create_plan(current_dist, regions, count, expand)
        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['regions'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['regions'] = result
Example #10
0
    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        nc = self._nova(cluster)
        zones_good = nc.validate_azs(self.zones.keys())
        if len(zones_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error(_LE('No availability zone found available.'))
            return

        zones = {}
        for z, w in self.zones.items():
            if z in zones_good:
                zones[z] = w

        current = cluster.get_zone_distribution(action.context, zones.keys())
        result = self._create_plan(current, zones, count, expand)

        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['zones'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['zones'] = result
Example #11
0
    def attach(self, cluster):
        """Routine to be invoked when policy is to be attached to a cluster.

        :para cluster: The Target cluster to be attached to;
        :returns: When the operation was successful, returns a tuple (True,
                  message); otherwise, return a tuple (False, error).
        """
        data = {}
        nv_client = self.nova(cluster)

        placement_group = self.properties.get(self.PLACEMENT_GROUP)
        group_name = placement_group.get('group_name', None)

        if group_name is None:
            profile = profile_base.Profile.load(
                oslo_context.get_current(), cluster.profile_id)
            if 'scheduler_hints' in profile.spec:
                hints = profile.spec['scheduler_hints']
                group_name = hints.get('group', None)

        if group_name is not None:
            # to add into nova driver
            try:
                server_group = nv_client.get_server_group(group_name)
            except exception.InternalError as ex:
                msg = 'Failed in searching server_group'
                LOG.exception(_LE('%(msg)s: %(ex)s') % {
                    'msg': msg, 'ex': six.text_type(ex)})
                return False, msg
            data['group_id'] = server_group.id
            data['inherited_group'] = True

        if data.get('group_id') is None:
            # to add into nova driver
            rule = placement_group.get('placement_rule',  'anti-affinity')

            try:
                server_group = nv_client.create_server_group(rule)
            except exception.InternalError as ex:
                msg = 'Failed in creating server_group'
                LOG.exception(_LE('%(msg)s: %(ex)s') % {
                    'msg': msg, 'ex': six.text_type(ex)})
                return False, msg
            data['group_id'] = server_group.id
            data['inherited_group'] = False

        policy_data = self._build_policy_data(data)

        return True, policy_data
Example #12
0
def ActionProc(context, action_id):
    '''Action process.'''

    # Step 1: materialize the action object
    action = Action.load(context, action_id=action_id)
    if action is None:
        LOG.error(_LE('Action "%s" could not be found.'), action_id)
        return False

    # TODO(Anyone): Remove context usage in event module
    EVENT.info(action.context, action, action.action, 'START')

    reason = 'Action completed'
    success = True
    try:
        # Step 2: execute the action
        result, reason = action.execute()
    except Exception as ex:
        # We catch exception here to make sure the following logics are
        # executed.
        result = action.RES_ERROR
        reason = six.text_type(ex)
        LOG.exception(_('Unexpected exception occurred during action '
                        '%(action)s (%(id)s) execution: %(reason)s'),
                      {'action': action.action, 'id': action.id,
                       'reason': reason})
        success = False
    finally:
        # NOTE: locks on action is eventually released here by status update
        action.set_status(result, reason)

    return success
Example #13
0
    def do_recover(self, obj, **options):
        """Default recover operation.

        :param obj: The node object to operate on.
        :param options: Keyword arguments for the recover operation.
        """
        operation = options.pop('operation', None)
        # TODO(Qiming): The operation input could be a list of operations.
        if operation and not isinstance(operation, six.string_types):
            operation = operation[0]

        if operation and operation != consts.RECOVER_RECREATE:
            LOG.error(_LE("Recover operation not supported: %s"), operation)
            return False

        try:
            self.do_delete(obj, **options)
        except exc.EResourceDeletion as ex:
            raise exc.EResourceOperation(op='recovering',
                                         type='node',
                                         id=obj.id,
                                         message=six.text_type(ex))
        res = None
        try:
            res = self.do_create(obj)
        except exc.EResourceCreation as ex:
            raise exc.EResourceOperation(op='recovering',
                                         type='node',
                                         id=obj.id,
                                         message=six.text_type(ex))
        return res
Example #14
0
        def hup(*args):
            # Shuts down the server(s), but allows running requests to complete

            self.LOG.error(_LE('SIGHUP received'))
            signal.signal(signal.SIGHUP, signal.SIG_IGN)
            os.killpg(0, signal.SIGHUP)
            signal.signal(signal.SIGHUP, hup)
Example #15
0
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False):
    """Try to lock the specified node.

    :param context: the context used for DB operations;
    :param node_id: ID of the node to be locked.
    :param action_id: ID of the action that attempts to lock the node.
    :param engine: ID of the engine that attempts to lock the node.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """
    # Step 1: try lock the node - if the returned owner_id is the
    #         action id, it was a success
    owner = db_api.node_lock_acquire(node_id, action_id)
    if action_id == owner:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for node %s again' % node_id)
        owner = db_api.node_lock_acquire(node_id, action_id)
        if action_id == owner:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owner = db_api.node_lock_steal(node_id, action_id)
        return action_id == owner

    # if this node lock by dead engine
    action = db_api.action_get(context, owner)
    if (action and action.owner and action.owner != engine
            and is_engine_dead(context, action.owner)):
        LOG.info(
            _LI('The node %(n)s is locked by dead action %(a)s, '
                'try to steal the lock.'), {
                    'n': node_id,
                    'a': owner
                })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context,
                                  action.id,
                                  time.time(),
                                  reason=reason)
        db_api.node_lock_steal(node_id, action_id)
        return True

    LOG.error(
        _LE('Node is already locked by action %(old)s, '
            'action %(new)s failed grabbing the lock'), {
                'old': owner,
                'new': action_id
            })

    return False
Example #16
0
    def detach(self, cluster):
        """Routine to be called when the policy is detached from a cluster.

        :param cluster: The cluster from which the policy is to be detached.
        :returns: When the operation was successful, returns a tuple of
                  (True, data) where the data contains references to the
                  resources created; otherwise returns a tuple of (False,
                  error) where the err contains a error message.
        """

        reason = _('Servergroup resource deletion succeeded.')

        ctx = context.get_admin_context()
        binding = db_api.cluster_policy_get(ctx, cluster.id, self.id)
        if not binding or not binding.data:
            return True, reason

        policy_data = self._extract_policy_data(binding.data)
        if not policy_data:
            return True, reason

        group_id = policy_data.get('servergroup_id', None)
        inherited_group = policy_data.get('inherited_group', False)

        if group_id and not inherited_group:
            try:
                self.nova(cluster).delete_server_group(group_id)
            except Exception as ex:
                msg = _('Failed in deleting servergroup.')
                LOG.exception(_LE('%(msg)s: %(ex)s') % {
                    'msg': msg, 'ex': six.text_type(ex)})
                return False, msg

        return True, reason
Example #17
0
    def execute(self, **kwargs):
        '''Wrapper of action execution.
        This is mainly a wrapper that executes an action with cluster lock
        acquired.
        :return: A tuple (res, reason) that indicates whether the execution
                 was a success and why if it wasn't a success.
        '''

        try:
            cluster = cluster_mod.Cluster.load(self.context, self.target)
        except exception.NotFound:
            reason = _('Cluster %(id)s not found') % {'id': self.target}
            LOG.error(_LE(reason))
            return self.RES_ERROR, reason

        # Try to lock cluster before do real operation
        forced = True if self.action == self.CLUSTER_DELETE else False
        res = senlin_lock.cluster_lock_acquire(cluster.id, self.id,
                                               senlin_lock.CLUSTER_SCOPE,
                                               forced)
        if not res:
            return self.RES_ERROR, _('Failed locking cluster')

        try:
            res, reason = self._execute(cluster)
        finally:
            senlin_lock.cluster_lock_release(cluster.id, self.id,
                                             senlin_lock.CLUSTER_SCOPE)

        return res, reason
Example #18
0
    def detach(self, cluster):
        """Routine to be called when the policy is detached from a cluster.

        :param cluster: The cluster from which the policy is to be detached.
        :returns: When the operation was successful, returns a tuple of
                  (True, data) where the data contains references to the
                  resources created; otherwise returns a tuple of (False,
                  error) where the err contains a error message.
        """

        reason = _('Servergroup resource deletion succeeded.')

        ctx = context.get_admin_context()
        binding = cpo.ClusterPolicy.get(ctx, cluster.id, self.id)
        if not binding or not binding.data:
            return True, reason

        policy_data = self._extract_policy_data(binding.data)
        if not policy_data:
            return True, reason

        group_id = policy_data.get('servergroup_id', None)
        inherited_group = policy_data.get('inherited_group', False)

        if group_id and not inherited_group:
            try:
                self.nova(cluster).delete_server_group(group_id)
            except Exception as ex:
                msg = _('Failed in deleting servergroup.')
                LOG.exception(_LE('%(msg)s: %(ex)s') % {
                    'msg': msg, 'ex': six.text_type(ex)})
                return False, msg

        return True, reason
Example #19
0
def node_lock_acquire(node_id, action_id, forced=False):
    '''Try to lock the specified node.

    :param forced_locking: set to True to cancel current action that
                           owns the lock, if any.
    '''
    # Step 1: try lock the node - if the returned owner_id is the
    #         action id, it was a success
    owner = db_api.node_lock_acquire(node_id, action_id)
    if action_id == owner:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        owner = db_api.node_lock_acquire(node_id, action_id)
        if action_id == owner:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owner = db_api.node_lock_steal(node_id, action_id)
        return action_id == owner

    LOG.error(_LE('Node is already locked by action %(old)s, '
                  'action %(new)s failed grabbing the lock') % {
                      'old': owner, 'new': action_id})

    return False
Example #20
0
    def detach(self, cluster):
        """Routine to be called when the policy is detached from a cluster.

        :param cluster: The cluster from which the policy is to be detached.
        :returns: When the operation was successful, returns a tuple of
                  (True, data) where the data contains references to the
                  resources created; otherwise returns a tuple of (False,
                  error) where the err contains a error message.
        """

        reason = _('Server group resources deletion succeeded')

        cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(),
                                               cluster.id, self.id)
        if cp is None or cp.data is None:
            return True, reason

        policy_data = self._extract_policy_data(cp.data)
        if policy_data is None:
            return True, reason

        group_id = policy_data.get('group_id', None)
        inherited_group = policy_data.get('inherited_group', False)

        if group_id and not inherited_group:
            try:
                # to add into nova driver
                self.nova(cluster).delete_server_group(group_id)
            except exception.InternalError as ex:
                msg = 'Failed in deleting server_group'
                LOG.exception(_LE('%(msg)s: %(ex)s') % {
                    'msg': msg, 'ex': six.text_type(ex)})
                return False, msg

        return True, reason
Example #21
0
    def kill_children(self, *args):
        """Kills the entire process group."""

        LOG.error(_LE('SIGTERM received'))
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        self.running = False
        os.killpg(0, signal.SIGTERM)
Example #22
0
    def kill_children(self, *args):
        """Kills the entire process group."""

        LOG.error(_LE('SIGTERM received'))
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        self.running = False
        os.killpg(0, signal.SIGTERM)
Example #23
0
    def member_add(self, node, lb_id, pool_id, port, subnet):
        """Add a member to Neutron lbaas pool.

        :param node: A node object to be added to the specified pool.
        :param lb_id: The ID of the loadbalancer.
        :param pool_id: The ID of the pool for receiving the node.
        :param port: The port for the new LB member to be created.
        :param subnet: The subnet to be used by the new LB member.
        :returns: The ID of the new LB member or None if errors occurred.
        """
        try:
            subnet_obj = self.nc().subnet_get(subnet)
            net_id = subnet_obj.network_id
            net = self.nc().network_get(net_id)
        except exception.InternalError as ex:
            resource = 'subnet' if subnet in ex.message else 'network'
            msg = _LE('Failed in getting %(resource)s: %(msg)s.'
                      ) % {'resource': resource, 'msg': six.text_type(ex)}
            LOG.exception(msg)
            return None
        net_name = net.name

        node_detail = node.get_details(oslo_context.get_current())
        addresses = node_detail.get('addresses')
        if net_name not in addresses:
            LOG.error(_LE('Node is not in subnet %(subnet)s'),
                      {'subnet': subnet})
            return None

        # Use the first IP address if more than one are found in target network
        address = addresses[net_name][0]
        try:
            member = self.nc().pool_member_create(pool_id, address, port,
                                                  subnet_obj.id)
        except exception.InternalError as ex:
            msg = _LE('Failed in creating lb pool member: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            return None
        res = self._wait_for_lb_ready(lb_id)
        if res is False:
            LOG.error(_LE('Failed in creating pool member (%s).') % member.id)
            return None

        return member.id
Example #24
0
def cluster_lock_acquire(context, cluster_id, action_id, engine=None,
                         scope=CLUSTER_SCOPE, forced=False):
    """Try to lock the specified cluster.

    :param cluster_id: ID of the cluster to be locked.
    :param action_id: ID of the action which wants to lock the cluster.
    :param engine: ID of the engine which wants to lock the cluster.
    :param scope: scope of lock, could be cluster wide lock, or node-wide
                  lock.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """

    # Step 1: try lock the cluster - if the returned owner_id is the
    #         action id, it was a success
    owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
    if action_id in owners:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for cluster %s again' % cluster_id)
        owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
        if action_id in owners:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    # Will reach here only because scope == CLUSTER_SCOPE
    action = db_api.action_get(context, owners[0])
    if (action and action.owner and action.owner != engine and
            is_engine_dead(context, action.owner)):
        LOG.info(_LI('The cluster %(c)s is locked by dead action %(a)s, '
                     'try to steal the lock.'), {
            'c': cluster_id,
            'a': owners[0]
        })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context, action.id, time.time(),
                                  reason=reason)
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    LOG.error(_LE('Cluster is already locked by action %(old)s, '
                  'action %(new)s failed grabbing the lock'),
              {'old': str(owners), 'new': action_id})

    return False
Example #25
0
    def lb_delete(self, **kwargs):
        """Delete a Neutron lbaas instance

        The following Neutron lbaas resources will be deleted in order:
        1)healthmonitor; 2)pool; 3)listener; 4)loadbalancer.
        """
        lb_id = kwargs.pop('loadbalancer')

        healthmonitor_id = kwargs.pop('healthmonitor', None)
        if healthmonitor_id:
            try:
                self.nc().healthmonitor_delete(healthmonitor_id)
            except exception.InternalError as ex:
                msg = _LE('Failed in deleting healthmonitor: %s.'
                          ) % six.text_type(ex)
                LOG.exception(msg)
                return False, msg
            res = self._wait_for_lb_ready(lb_id)
            if res is False:
                msg = _LE('Failed in deleting healthmonitor '
                          '(%s).') % healthmonitor_id
                return False, msg

        pool_id = kwargs.pop('pool', None)
        if pool_id:
            try:
                self.nc().pool_delete(pool_id)
            except exception.InternalError as ex:
                msg = _LE('Failed in deleting lb pool: %s.'
                          ) % six.text_type(ex)
                LOG.exception(msg)
                return False, msg
            res = self._wait_for_lb_ready(lb_id)
            if res is False:
                msg = _LE('Failed in deleting pool (%s).') % pool_id
                return False, msg

        listener_id = kwargs.pop('listener', None)
        if listener_id:
            try:
                self.nc().listener_delete(listener_id)
            except exception.InternalError as ex:
                msg = _LE('Failed in deleting listener: %s.'
                          ) % six.text_type(ex)
                LOG.exception(msg)
                return False, msg
            res = self._wait_for_lb_ready(lb_id)
            if res is False:
                msg = _LE('Failed in deleting listener (%s).') % listener_id
                return False, msg

        self.nc().loadbalancer_delete(lb_id)
        res = self._wait_for_lb_ready(lb_id, ignore_not_found=True)
        if res is False:
            msg = _LE('Failed in deleting loadbalancer (%s).') % lb_id
            return False, msg

        return True, _('LB deletion succeeded')
Example #26
0
def node_lock_acquire(context, node_id, action_id, engine=None,
                      forced=False):
    """Try to lock the specified node.

    :param context: the context used for DB operations;
    :param node_id: ID of the node to be locked.
    :param action_id: ID of the action that attempts to lock the node.
    :param engine: ID of the engine that attempts to lock the node.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """
    # Step 1: try lock the node - if the returned owner_id is the
    #         action id, it was a success
    owner = db_api.node_lock_acquire(node_id, action_id)
    if action_id == owner:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        LOG.debug('Acquire lock for node %s again' % node_id)
        owner = db_api.node_lock_acquire(node_id, action_id)
        if action_id == owner:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owner = db_api.node_lock_steal(node_id, action_id)
        return action_id == owner

    # if this node lock by dead engine
    action = db_api.action_get(context, owner)
    if (action and action.owner and action.owner != engine and
            is_engine_dead(context, action.owner)):
        LOG.info(_LI('The node %(n)s is locked by dead action %(a)s, '
                     'try to steal the lock.'), {
            'n': node_id,
            'a': owner
        })
        reason = _('Engine died when executing this action.')
        db_api.action_mark_failed(context, action.id, time.time(),
                                  reason=reason)
        db_api.node_lock_steal(node_id, action_id)
        return True

    LOG.error(_LE('Node is already locked by action %(old)s, '
                  'action %(new)s failed grabbing the lock'),
              {'old': owner, 'new': action_id})

    return False
Example #27
0
    def do_create(self, context, **kwargs):
        '''Additional logic at the beginning of cluster creation process.

        Set cluster status to CREATING.
        '''
        if self.status != self.INIT:
            LOG.error(_LE('Cluster is in status "%s"'), self.status)
            return False

        self.set_status(context, self.CREATING, reason='Creation in progress')
        return True
Example #28
0
    def do_create(self, context, **kwargs):
        '''Additional logic at the beginning of cluster creation process.

        Set cluster status to CREATING.
        '''
        if self.status != self.INIT:
            LOG.error(_LE('Cluster is in status "%s"'), self.status)
            return False

        self.set_status(context, self.CREATING, reason='Creation in progress')
        return True
Example #29
0
def cluster_lock_acquire(context,
                         cluster_id,
                         action_id,
                         engine=None,
                         scope=CLUSTER_SCOPE,
                         forced=False):
    """Try to lock the specified cluster.

    :param cluster_id: ID of the cluster to be locked.
    :param action_id: ID of the action which wants to lock the cluster.
    :param engine: ID of the engine which wants to lock the cluster.
    :param scope: scope of lock, could be cluster wide lock, or node-wide
                  lock.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """

    # Step 1: try lock the cluster - if the returned owner_id is the
    #         action id, it was a success
    owners = cl_obj.ClusterLock.acquire(cluster_id, action_id, scope)
    if action_id in owners:
        return True

    # Step 2: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owners = cl_obj.ClusterLock.steal(cluster_id, action_id)
        return action_id in owners

    # Step 3: check if the owner is a dead engine, if so, steal the lock.
    # Will reach here only because scope == CLUSTER_SCOPE
    action = ao.Action.get(context, owners[0])
    if (action and action.owner and action.owner != engine
            and utils.is_engine_dead(context, action.owner)):
        LOG.info(
            _LI('The cluster %(c)s is locked by dead action %(a)s, '
                'try to steal the lock.'), {
                    'c': cluster_id,
                    'a': owners[0]
                })
        reason = _('Engine died when executing this action.')
        owners = cl_obj.ClusterLock.steal(cluster_id, action_id)
        # Mark the old action to failed.
        ao.Action.mark_failed(context, action.id, time.time(), reason)
        return action_id in owners

    LOG.error(
        _LE('Cluster is already locked by action %(old)s, '
            'action %(new)s failed grabbing the lock'), {
                'old': str(owners),
                'new': action_id
            })

    return False
Example #30
0
def error(context, entity, action, status=None, status_reason=None,
          timestamp=None):
    timestamp = timestamp or timeutils.utcnow(True)
    event = Event(timestamp, logging.ERROR, entity,
                  action=action, status=status, status_reason=status_reason,
                  user=context.user, project=context.project)
    event.store(context)
    LOG.error(_LE('%(name)s [%(id)s] %(action)s - %(status)s: %(reason)s'),
              {'name': event.oname,
               'id': event.oid and event.oid[:8],
               'action': action,
               'status': status,
               'reason': status_reason})
Example #31
0
    def member_remove(self, lb_id, pool_id, member_id):
        """Delete a member from Neutron lbaas pool.

        :param lb_id: The ID of the loadbalancer the operation is targeted at;
        :param pool_id: The ID of the pool from which the member is deleted;
        :param member_id: The ID of the LB member.
        :returns: True if the operation succeeded or False if errors occurred.
        """
        try:
            self.nc().pool_member_delete(pool_id, member_id)
        except exception.InternalError as ex:
            msg = _LE('Failed in removing member %(m)s from pool %(p)s: '
                      '%(ex)s') % {'m': member_id, 'p': pool_id,
                                   'ex': six.text_type(ex)}
            LOG.exception(msg)
            return None
        res = self._wait_for_lb_ready(lb_id)
        if res is False:
            LOG.error(_LE('Failed in deleting pool member (%s).') % member_id)
            return None

        return True
Example #32
0
    def _verify_and_respawn_children(self, pid, status):
        if len(self.stale_children) == 0:
            LOG.debug('No stale children')

        if os.WIFEXITED(status) and os.WEXITSTATUS(status) != 0:
            LOG.error(_LE('Not respawning child %d, cannot '
                          'recover from termination'), pid)
            if not self.children and not self.stale_children:
                LOG.info(_LI('All workers have terminated. Exiting'))
                self.running = False
        else:
            if len(self.children) < self.conf.workers:
                self.run_child()
Example #33
0
    def validate_for_update(self, new_profile):
        non_updatables = []
        for (k, v) in new_profile.properties.items():
            if self.properties.get(k, None) != v:
                if not self.properties_schema[k].updatable:
                    non_updatables.append(k)

        if not non_updatables:
            return True

        msg = ", ".join(non_updatables)
        LOG.error(_LE("The following properties are not updatable: %s.") % msg)
        return False
Example #34
0
    def validate_for_update(self, new_profile):
        non_updatables = []
        for (k, v) in new_profile.properties.items():
            if self.properties.get(k, None) != v:
                if not self.properties_schema[k].updatable:
                    non_updatables.append(k)

        if not non_updatables:
            return True

        msg = ", ".join(non_updatables)
        LOG.error(_LE("The following properties are not updatable: %s."
                      ) % msg)
        return False
Example #35
0
    def _verify_and_respawn_children(self, pid, status):
        if len(self.stale_children) == 0:
            LOG.debug('No stale children')

        if os.WIFEXITED(status) and os.WEXITSTATUS(status) != 0:
            LOG.error(
                _LE('Not respawning child %d, cannot '
                    'recover from termination'), pid)
            if not self.children and not self.stale_children:
                LOG.info(_LI('All workers have terminated. Exiting'))
                self.running = False
        else:
            if len(self.children) < self.conf.workers:
                self.run_child()
Example #36
0
    def read_global_environment(self):
        '''Read and parse global environment files.'''

        cfg.CONF.import_opt('environment_dir', 'senlin.common.config')
        env_dir = cfg.CONF.environment_dir

        try:
            files = glob.glob(os.path.join(env_dir, '*'))
        except OSError as ex:
            LOG.error(_LE('Failed to read %s'), env_dir)
            LOG.exception(ex)
            return

        for fname in files:
            try:
                with open(fname) as f:
                    LOG.info(_LI('Loading environment from %s'), fname)
                    self.load(self.parse(f.read()))
            except ValueError as vex:
                LOG.error(_LE('Failed to parse %s'), fname)
                LOG.exception(six.text_type(vex))
            except IOError as ioex:
                LOG.error(_LE('Failed to read %s'), fname)
                LOG.exception(six.text_type(ioex))
Example #37
0
    def member_remove(self, lb_id, pool_id, member_id):
        """Delete a member from Neutron lbaas pool.

        :param lb_id: The ID of the loadbalancer the operation is targeted at;
        :param pool_id: The ID of the pool from which the member is deleted;
        :param member_id: The ID of the LB member.
        :returns: True if the operation succeeded or False if errors occurred.
        """
        try:
            self.nc().pool_member_delete(pool_id, member_id)
        except exception.InternalError as ex:
            msg = _LE('Failed in removing member %(m)s from pool %(p)s: '
                      '%(ex)s') % {'m': member_id, 'p': pool_id,
                                   'ex': six.text_type(ex)}
            LOG.exception(msg)
            EVENT.warning(oslo_context.get_current(), self,
                          'POOL_MEMBER_DELETE', 'ERROR', msg)
            return None
        res = self._wait_for_lb_ready(lb_id)
        if res is False:
            LOG.error(_LE('Failed in deleting pool member (%s).') % member_id)
            return None

        return True
Example #38
0
    def read_global_environment(self):
        '''Read and parse global environment files.'''

        cfg.CONF.import_opt('environment_dir', 'senlin.common.config')
        env_dir = cfg.CONF.environment_dir

        try:
            files = glob.glob(os.path.join(env_dir, '*'))
        except OSError as ex:
            LOG.error(_LE('Failed to read %s'), env_dir)
            LOG.exception(ex)
            return

        for fname in files:
            try:
                with open(fname) as f:
                    LOG.info(_LI('Loading environment from %s'), fname)
                    self.load(self.parse(f.read()))
            except ValueError as vex:
                LOG.error(_LE('Failed to parse %s'), fname)
                LOG.exception(six.text_type(vex))
            except IOError as ioex:
                LOG.error(_LE('Failed to read %s'), fname)
                LOG.exception(six.text_type(ioex))
Example #39
0
    def __init__(self, **kwargs):
        self.kwargs = kwargs

        try:
            self.message = self.msg_fmt % kwargs
        except KeyError:
            # exc_info = sys.exc_info()
            # if kwargs doesn't match a variable in the message
            # log the issue and the kwargs
            LOG.exception(_LE('Exception in string format operation'))
            for name, value in six.iteritems(kwargs):
                LOG.error("%s: %s" % (name, value))  # noqa

            if _FATAL_EXCEPTION_FORMAT_ERRORS:
                raise
Example #40
0
    def __init__(self, **kwargs):
        self.kwargs = kwargs

        try:
            self.message = self.msg_fmt % kwargs
        except KeyError:
            # exc_info = sys.exc_info()
            # if kwargs doesn't match a variable in the message
            # log the issue and the kwargs
            LOG.exception(_LE('Exception in string format operation'))
            for name, value in six.iteritems(kwargs):
                LOG.error("%s: %s" % (name, value))  # noqa

            if _FATAL_EXCEPTION_FORMAT_ERRORS:
                raise
Example #41
0
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False):
    """Try to lock the specified node.

    :param context: the context used for DB operations;
    :param node_id: ID of the node to be locked.
    :param action_id: ID of the action that attempts to lock the node.
    :param engine: ID of the engine that attempts to lock the node.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """
    # Step 1: try lock the node - if the returned owner_id is the
    #         action id, it was a success
    owner = nl_obj.NodeLock.acquire(node_id, action_id)
    if action_id == owner:
        return True

    # Step 2: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owner = nl_obj.NodeLock.steal(node_id, action_id)
        return action_id == owner

    # Step 3: Try to steal a lock if it's owner is a dead engine.
    # if this node lock by dead engine
    action = ao.Action.get(context, owner)
    if (action and action.owner and action.owner != engine
            and utils.is_engine_dead(context, action.owner)):
        LOG.info(
            _LI('The node %(n)s is locked by dead action %(a)s, '
                'try to steal the lock.'), {
                    'n': node_id,
                    'a': owner
                })
        reason = _('Engine died when executing this action.')
        nl_obj.NodeLock.steal(node_id, action_id)
        ao.Action.mark_failed(context, action.id, time.time(), reason)
        return True

    LOG.error(
        _LE('Node is already locked by action %(old)s, '
            'action %(new)s failed grabbing the lock'), {
                'old': owner,
                'new': action_id
            })

    return False
Example #42
0
 def wait_on_children(self):
     while self.running:
         try:
             pid, status = os.wait()
             if os.WIFEXITED(status) or os.WIFSIGNALED(status):
                 self.LOG.error(_LE('Removing dead child %s') % pid)
                 self.children.remove(pid)
                 self.run_child()
         except OSError as err:
             if err.errno not in (errno.EINTR, errno.ECHILD):
                 raise
         except KeyboardInterrupt:
             self.LOG.info(_LI('Caught keyboard interrupt. Exiting.'))
             os.killpg(0, signal.SIGTERM)
             break
     eventlet.greenio.shutdown_safe(self.sock)
     self.sock.close()
     self.LOG.debug('Exited')
Example #43
0
    def __init__(self, **kwargs):
        self.kwargs = kwargs

        try:
            self.message = self.msg_fmt % kwargs
            # if last char is '.', wipe out redundant '.'
            if self.message[-1] == '.':
                self.message = self.message.rstrip('.') + '.'
        except KeyError:
            # exc_info = sys.exc_info()
            # if kwargs doesn't match a variable in the message
            # log the issue and the kwargs
            LOG.exception(_LE('Exception in string format operation'))
            for name, value in kwargs.items():
                LOG.error("%s: %s" % (name, value))  # noqa

            if _FATAL_EXCEPTION_FORMAT_ERRORS:
                raise
Example #44
0
    def do_create(self, context):
        if self.status != self.INIT:
            LOG.error(_LE('Node is in status "%s"'), self.status)
            return False
        self.set_status(context, self.CREATING, reason='Creation in progress')
        event_mod.info(context, self, 'create')
        try:
            physical_id = profile_base.Profile.create_object(context, self)
        except exception.InternalError as ex:
            self._handle_exception(context, 'create', self.ERROR, ex)
            return False
        if not physical_id:
            return False

        status_reason = 'Creation succeeded'
        self.set_status(context, self.ACTIVE, status_reason)
        self.physical_id = physical_id
        self.store(context)
        return True
Example #45
0
def _dump(level, action, phase, reason, timestamp):
    global dispatchers

    if timestamp is None:
        timestamp = timeutils.utcnow(True)

    # We check the logging level threshold only when debug is False
    if cfg.CONF.debug is False:
        watermark = cfg.CONF.dispatchers.priority.upper()
        bound = consts.EVENT_LEVELS.get(watermark, logging.INFO)
        if level < bound:
            return

    try:
        dispatchers.map_method("dump", level, action,
                               phase=phase, reason=reason, timestamp=timestamp)
    except Exception as ex:
        LOG.exception(_LE("Dispatcher failed to handle the event: %s"),
                      six.text_type(ex))
Example #46
0
    def do_create(self, context):
        if self.status != self.INIT:
            LOG.error(_LE('Node is in status "%s"'), self.status)
            return False
        self.set_status(context, self.CREATING, reason='Creation in progress')
        event_mod.info(context, self, 'create')
        try:
            physical_id = profile_base.Profile.create_object(context, self)
        except exception.InternalError as ex:
            self._handle_exception(context, 'create', self.ERROR, ex)
            return False
        if not physical_id:
            return False

        status_reason = 'Creation succeeded'
        self.set_status(context, self.ACTIVE, status_reason)
        self.physical_id = physical_id
        self.store(context)
        return True
Example #47
0
    def do_create(self, context):
        if self.status != self.INIT:
            LOG.error(_LE('Node is in status "%s"'), self.status)
            return False
        self.set_status(context, self.CREATING, reason='Creation in progress')
        event_mod.info(context, self, 'create')
        physical_id = profile_base.Profile.create_object(context, self)
        if not physical_id:
            return False

        if self.cluster_id is not None:
            self.index = db_api.cluster_get_next_index(context,
                                                       self.cluster_id)

        self.physical_id = physical_id
        self.created_time = datetime.datetime.utcnow()
        self.status = self.ACTIVE
        self.status_reason = 'Creation succeeded'
        self.store(context)
        return True
Example #48
0
    def do_create(self, context):
        if self.status != consts.NS_INIT:
            LOG.error(_LE('Node is in status "%s"'), self.status)
            return False

        self.set_status(context, consts.NS_CREATING, _('Creation in progress'))
        try:
            physical_id = pb.Profile.create_object(context, self)
        except exc.EResourceCreation as ex:
            physical_id = ex.resource_id
            self.set_status(context,
                            consts.NS_ERROR,
                            six.text_type(ex),
                            physical_id=physical_id)
            return False

        self.set_status(context,
                        consts.NS_ACTIVE,
                        _('Creation succeeded'),
                        physical_id=physical_id)
        return True
Example #49
0
    def do_recover(self, obj, **options):
        """Default recover operation.

        :param obj: The node object to operate on.
        :param options: Keyword arguments for the recover operation.
        """
        operation = options.get('operation', None)
        if operation and operation != consts.RECOVER_RECREATE:
            LOG.error(_LE("Recover operation not supported: %s"), operation)
            return False

        res = self.do_delete(obj)
        if res:
            try:
                res = self.do_create(obj)
            except Exception as ex:
                LOG.exception(_('Failed at recovering obj: %s '),
                              six.text_type(ex))
                return False

        return res
Example #50
0
    def do_recover(self, obj, **options):
        """Default recover operation.

        :param obj: The node object to operate on.
        :param options: Keyword arguments for the recover operation.
        """
        operation = options.get('operation', None)
        if operation and operation != consts.RECOVER_RECREATE:
            LOG.error(_LE("Recover operation not supported: %s"), operation)
            return False

        res = self.do_delete(obj)
        if res:
            try:
                res = self.do_create(obj)
            except Exception as ex:
                LOG.exception(_('Failed at recovering obj: %s '),
                              six.text_type(ex))
                return False

        return res
Example #51
0
def cluster_lock_acquire(cluster_id, action_id, scope=CLUSTER_SCOPE,
                         forced=False):
    """Try to lock the specified cluster.

    :param cluster_id: ID of the cluster to be locked.
    :param action_id: ID of the action which wants to lock the cluster.
    :param scope: scope of lock, could be cluster wide lock, or node-wide
                  lock.
    :param forced: set to True to cancel current action that owns the lock,
                   if any.
    :returns: True if lock is acquired, or False otherwise.
    """

    # Step 1: try lock the cluster - if the returned owner_id is the
    #         action id, it was a success
    owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
    if action_id in owners:
        return True

    # Step 2: retry using global configuration options
    retries = cfg.CONF.lock_retry_times
    retry_interval = cfg.CONF.lock_retry_interval

    while retries > 0:
        scheduler.sleep(retry_interval)
        owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope)
        if action_id in owners:
            return True
        retries = retries - 1

    # Step 3: Last resort is 'forced locking', only needed when retry failed
    if forced:
        owners = db_api.cluster_lock_steal(cluster_id, action_id)
        return action_id in owners

    LOG.error(_LE('Cluster is already locked by action %(old)s, '
                  'action %(new)s failed grabbing the lock'),
              {'old': str(owners), 'new': action_id})

    return False
    def pre_op(self, cluster_id, action):
        """Callback function when new nodes are to be created for a cluster.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """
        pd = action.data.get('creation', {})
        if pd:
            count = pd.get('count', 1)
        else:
            # If no scaling policy is attached, use the input count directly
            count = action.inputs.get('count', 1)

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        zones = self._validate_zones(cluster)
        if len(zones) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error(_LE('No availability zone found available.'))
            return

        # Calculate AZ distribution for exiting nodes
        current_dist = self._get_current_dist(action.context, zones, cluster)
        # Calculate placement plan for new nodes
        plan = self._create_plan(current_dist, zones, count)

        placement = action.data.get('placement', {})
        placement['count'] = count
        placement['placements'] = []

        for az, count in plan.items():
            if count > 0:
                entry = {'zone': az}
                placement['placements'].extend([entry] * count)

        action.data.update({'placement': placement})

        return
Example #53
0
    def _wait_for_lb_ready(self, lb_id, timeout=60, ignore_not_found=False):
        """Keep waiting until loadbalancer is ready

        This method will keep waiting until loadbalancer resource specified
        by lb_id becomes ready, i.e. its provisioning_status is ACTIVE and
        its operating_status is ONLINE.

        :param lb_id: ID of the load-balancer to check.
        :param timeout: timeout in seconds.
        :param ignore_not_found: if set to True, nonexistent loadbalancer
            resource is also an acceptable result.
        """
        waited = 0
        while waited < timeout:
            try:
                lb = self.nc().loadbalancer_get(lb_id)
            except exception.InternalError as ex:
                msg = _LE('Failed in getting loadbalancer: %s.'
                          ) % six.text_type(ex)
                LOG.exception(msg)
                EVENT.warning(oslo_context.get_current(), self, 'LB_GET',
                              'ERROR', msg)
                return False
            if lb is None:
                lb_ready = ignore_not_found
            else:
                lb_ready = ((lb.provisioning_status == 'ACTIVE') and
                            (lb.operating_status == 'ONLINE'))

            if lb_ready is True:
                return True

            LOG.debug(_('Waiting for loadbalancer %(lb)s to become ready'),
                      {'lb': lb_id})

            eventlet.sleep(2)
            waited += 2

        return False
Example #54
0
    def _wait_for_lb_ready(self, lb_id, timeout=60, ignore_not_found=False):
        """Keep waiting until loadbalancer is ready

        This method will keep waiting until loadbalancer resource specified
        by lb_id becomes ready, i.e. its provisioning_status is ACTIVE and
        its operating_status is ONLINE.

        :param lb_id: ID of the load-balancer to check.
        :param timeout: timeout in seconds.
        :param ignore_not_found: if set to True, nonexistent loadbalancer
            resource is also an acceptable result.
        """
        waited = 0
        while waited < timeout:
            try:
                lb = self.nc().loadbalancer_get(lb_id)
            except exception.InternalError as ex:
                msg = _LE('Failed in getting loadbalancer: %s.'
                          ) % six.text_type(ex)
                LOG.exception(msg)
                return False
            if lb is None:
                lb_ready = ignore_not_found
            else:
                lb_ready = ((lb.provisioning_status == 'ACTIVE') and
                            (lb.operating_status == 'ONLINE'))

            if lb_ready is True:
                return True

            LOG.debug(_('Waiting for loadbalancer %(lb)s to become ready'),
                      {'lb': lb_id})

            eventlet.sleep(2)
            waited += 2

        return False
Example #55
0
    def do_check(self, obj):
        """Check stack status.

        :param obj: Node object to operate.
        :returns: True if check succeeded, or False otherwise.
        """
        stack_id = obj.physical_id
        if stack_id is None:
            return False

        hc = self.orchestration(obj)
        try:
            # Timeout = None means we will use the 'default_action_timeout'
            # It can be overridden by the TIMEOUT profile propertie
            timeout = None
            if self.properties[self.TIMEOUT]:
                timeout = self.properties[self.TIMEOUT] * 60
            hc.stack_check(stack_id)
            hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout)
        except exc.InternalError as ex:
            LOG.error(_LE('Failed in checking stack: %s.'), ex)
            return False

        return True
Example #56
0
    def signal(self, cmd):
        """Send a signal to the action.

        :param cmd: One of the command word defined in self.COMMANDS.
        :returns: None
        """
        if cmd not in self.COMMANDS:
            return

        if cmd == self.SIG_CANCEL:
            expected = (self.INIT, self.WAITING, self.READY, self.RUNNING)
        elif cmd == self.SIG_SUSPEND:
            expected = (self.RUNNING)
        else:  # SIG_RESUME
            expected = (self.SUSPENDED)

        if self.status not in expected:
            LOG.error(
                _LE("Action (%(id)s) is in status (%(actual)s) while "
                    "expected status should be one of (%(expected)s)."),
                dict(id=self.id[:8], expected=expected, actual=self.status))
            return

        ao.Action.signal(self.context, self.id, cmd)
Example #57
0
def log_exception(err, exc_info):
    args = {'exc_info': exc_info} if cfg.CONF.verbose or cfg.CONF.debug else {}
    logging.error(_LE("Unexpected error occurred serving API: %s") % err,
                  **args)
Example #58
0
    def attach(self, cluster):
        """Routine to be invoked when policy is to be attached to a cluster.

        :para cluster: The target cluster to attach to;
        :returns: When the operation was successful, returns a tuple (True,
                  message); otherwise, return a tuple (False, error).
        """
        res, data = super(AffinityPolicy, self).attach(cluster)
        if res is False:
            return False, data

        data = {'inherited_group': False}
        nc = self.nova(cluster)
        group = self.properties.get(self.SERVER_GROUP)

        # guess servergroup name
        group_name = group.get(self.GROUP_NAME, None)

        if group_name is None:
            profile = cluster.rt['profile']
            if 'scheduler_hints' in profile.spec:
                hints = profile.spec['scheduler_hints']
                group_name = hints.get('group', None)

        if group_name:
            try:
                server_group = nc.find_server_group(group_name, True)
            except exception.InternalError as ex:
                msg = _("Failed in retrieving servergroup '%s'.") % group_name
                LOG.exception(
                    _LE('%(msg)s: %(ex)s') % {
                        'msg': msg,
                        'ex': six.text_type(ex)
                    })
                return False, msg

            if server_group:
                # Check if the policies match
                policies = group.get(self.GROUP_POLICIES)
                if policies and policies != server_group.policies[0]:
                    msg = _(
                        "Policies specified (%(specified)s) doesn't match "
                        "that of the existing servergroup (%(existing)s).") % {
                            'specified': policies,
                            'existing': server_group.policies[0]
                        }
                    return False, msg

                data['servergroup_id'] = server_group.id
                data['inherited_group'] = True

        if not data['inherited_group']:
            # create a random name if necessary
            if not group_name:
                group_name = 'server_group_%s' % utils.random_name()
            try:
                server_group = nc.create_server_group(
                    name=group_name, policies=[group.get(self.GROUP_POLICIES)])
            except Exception as ex:
                msg = _('Failed in creating servergroup.')
                LOG.exception(
                    _LE('%(msg)s: %(ex)s') % {
                        'msg': msg,
                        'ex': six.text_type(ex)
                    })
                return False, msg

            data['servergroup_id'] = server_group.id

        policy_data = self._build_policy_data(data)

        return True, policy_data
Example #59
0
    def lb_create(self, vip, pool, hm=None):
        """Create a LBaaS instance

        :param vip: A dict containing the properties for the VIP;
        :param pool: A dict describing the pool of load-balancer members.
        :param pool: A dict describing the health monitor.
        """
        def _cleanup(msg, **kwargs):
            LOG.error(msg)
            self.lb_delete(**kwargs)
            return

        result = {}
        # Create loadblancer
        try:
            subnet = self.nc().subnet_get(vip['subnet'])
        except exception.InternalError as ex:
            msg = _LE('Failed in getting subnet: %s.') % six.text_type(ex)
            LOG.exception(msg)
            return False, msg
        subnet_id = subnet.id
        try:
            lb = self.nc().loadbalancer_create(subnet_id,
                                               vip.get('address', None),
                                               vip['admin_state_up'])
        except exception.InternalError as ex:
            msg = _LE('Failed in creating loadbalancer: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            return False, msg
        result['loadbalancer'] = lb.id
        result['vip_address'] = lb.vip_address

        res = self._wait_for_lb_ready(lb.id)
        if res is False:
            msg = _LE('Failed in creating load balancer (%s).') % lb.id
            del result['vip_address']
            _cleanup(msg, **result)
            return False, msg

        # Create listener
        try:
            listener = self.nc().listener_create(lb.id, vip['protocol'],
                                                 vip['protocol_port'],
                                                 vip.get('connection_limit',
                                                         None),
                                                 vip['admin_state_up'])
        except exception.InternalError as ex:
            msg = _LE('Failed in creating lb listener: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            return False, msg
        result['listener'] = listener.id
        res = self._wait_for_lb_ready(lb.id)
        if res is False:
            msg = _LE('Failed in creating listener (%s).') % listener.id
            del result['vip_address']
            _cleanup(msg, **result)
            return res, msg

        # Create pool
        try:
            pool = self.nc().pool_create(pool['lb_method'], listener.id,
                                         pool['protocol'],
                                         pool['admin_state_up'])
        except exception.InternalError as ex:
            msg = _LE('Failed in creating lb pool: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            return False, msg
        result['pool'] = pool.id
        res = self._wait_for_lb_ready(lb.id)
        if res is False:
            msg = _LE('Failed in creating pool (%s).') % pool.id
            del result['vip_address']
            _cleanup(msg, **result)
            return res, msg

        if not hm:
            return True, result

        if not hm:
            return True, result

        # Create health monitor
        try:
            health_monitor = self.nc().healthmonitor_create(
                hm['type'], hm['delay'], hm['timeout'], hm['max_retries'],
                pool.id, hm['admin_state_up'], hm['http_method'],
                hm['url_path'], hm['expected_codes'])
        except exception.InternalError as ex:
            msg = _LE('Failed in creating lb health monitor: %s.'
                      ) % six.text_type(ex)
            LOG.exception(msg)
            return False, msg
        result['healthmonitor'] = health_monitor.id
        res = self._wait_for_lb_ready(lb.id)
        if res is False:
            msg = _LE('Failed in creating health monitor (%s).'
                      ) % health_monitor.id
            del result['vip_address']
            _cleanup(msg, **result)
            return res, msg

        return True, result