def recover_delayed_executions(): coordinator = coordination.get_coordinator() dt_now = date_utils.get_datetime_utc_now() dt_delta = datetime.timedelta(seconds=cfg.CONF.scheduler.delayed_execution_recovery) dt_timeout = dt_now - dt_delta with coordinator.get_lock('st2-rescheduling-delayed-executions'): liveactions = LiveAction.query(status=action_constants.LIVEACTION_STATUS_DELAYED, start_timestamp__lte=dt_timeout, order_by=['start_timestamp']) if not liveactions: return LOG.info('There are %d liveactions that have been delayed for longer than %d seconds.', len(liveactions), cfg.CONF.scheduler.delayed_execution_recovery) # Update status to requested and publish status for each liveactions. rescheduled = 0 for instance in liveactions: try: action_service.update_status(instance, action_constants.LIVEACTION_STATUS_REQUESTED, publish=True) rescheduled += 1 except: LOG.exception('Unable to reschedule liveaction. <LiveAction.id=%s>', instance.id) LOG.info('Rescheduled %d out of %d delayed liveactions.', len(liveactions), rescheduled)
def __init__(self): self.message_type = LiveActionDB self._shutdown = False self._pool = eventlet.GreenPool(size=cfg.CONF.scheduler.pool_size) self._coordinator = coordination_service.get_coordinator() self._main_thread = None self._cleanup_thread = None
def __init__(self, policy_ref, policy_type, threshold=0, action='delay'): super(BaseConcurrencyApplicator, self).__init__(policy_ref=policy_ref, policy_type=policy_type) self.threshold = threshold self.policy_action = action self.coordinator = coordination.get_coordinator()
def __init__(self): self.message_type = LiveActionDB self._shutdown = False self._pool = eventlet.GreenPool(size=cfg.CONF.scheduler.pool_size) self._coordinator = coordination_service.get_coordinator(start_heart=True) self._main_thread = None self._cleanup_thread = None
def register_service_in_service_registry(service, capabilities=None, start_heart=True): """ Register provided service in the service registry and start the heartbeat process. :param service: Service name which will also be used for a group name (e.g. "api"). :type service: ``str`` :param capabilities: Optional metadata associated with the service. :type capabilities: ``dict`` """ # NOTE: It's important that we pass start_heart=True to start the hearbeat process coordinator = coordination.get_coordinator(start_heart=start_heart) member_id = coordination.get_member_id() # 1. Create a group with the name of the service if not isinstance(service, six.binary_type): group_id = service.encode('utf-8') else: group_id = service try: coordinator.create_group(group_id).get() except GroupAlreadyExist: pass # Include common capabilities such as hostname and process ID proc_info = system_info.get_process_info() capabilities['hostname'] = proc_info['hostname'] capabilities['pid'] = proc_info['pid'] # 1. Join the group as a member LOG.debug('Joining service registry group "%s" as member_id "%s" with capabilities "%s"' % (group_id, member_id, capabilities)) return coordinator.join_group(group_id, capabilities=capabilities).get()
def get_one(self, group_id, requester_user): rbac_utils = get_rbac_backend().get_utils_class() rbac_utils.assert_user_is_admin(user_db=requester_user) coordinator = coordination.get_coordinator() if not isinstance(group_id, six.binary_type): group_id = group_id.encode('utf-8') try: member_ids = list(coordinator.get_members(group_id).get()) except GroupNotCreated: msg = ('Group with ID "%s" not found.' % (group_id.decode('utf-8'))) raise StackStormDBObjectNotFoundError(msg) result = { 'members': [] } for member_id in member_ids: capabilities = coordinator.get_member_capabilities(group_id, member_id).get() item = { 'group_id': group_id.decode('utf-8'), 'member_id': member_id.decode('utf-8'), 'capabilities': capabilities } result['members'].append(item) return result
def __init__(self, policy_ref, policy_type, threshold=0, action='delay'): super(BaseConcurrencyApplicator, self).__init__(policy_ref=policy_ref, policy_type=policy_type) self.threshold = threshold self.policy_action = action self.coordinator = coordination.get_coordinator(start_heart=True)
def __init__(self): self.message_type = LiveActionDB self._shutdown = False self._pool = eventlet.GreenPool(size=cfg.CONF.scheduler.pool_size) self._execution_scheduling_timeout_threshold_min = \ cfg.CONF.scheduler.execution_scheduling_timeout_threshold_min \ * 60 * 1000 self._coordinator = coordination_service.get_coordinator(start_heart=True) self._main_thread = None self._cleanup_thread = None
def get_all(self, requester_user): rbac_utils = get_rbac_backend().get_utils_class() rbac_utils.assert_user_is_admin(user_db=requester_user) coordinator = coordination.get_coordinator() group_ids = list(coordinator.get_groups().get()) group_ids = [item.decode('utf-8') for item in group_ids] result = {'groups': group_ids} return result
def setUpClass(cls): tests_config.parse_args(coordinator_noop=True) super(ServiceRegistryControllerRBACTestCase, cls).setUpClass() cls.coordinator = coordination.get_coordinator(use_cache=False) # Register mock service in the service registry for testing purposes register_service_in_service_registry(service='mock_service', capabilities={'key1': 'value1', 'name': 'mock_service'}, start_heart=True)
def test_deregister_service_when_service_registry_enabled(self): service = "api" service_setup.register_service_in_service_registry(service, capabilities={ "hostname": "", "pid": "" }) coordinator = coordination.get_coordinator(start_heart=True) members = coordinator.get_members(service.encode("utf-8")) self.assertEqual(len(list(members.get())), 1) service_setup.deregister_service(service) self.assertEqual(len(list(members.get())), 0)
def setUpClass(cls): tests_config.parse_args(coordinator_noop=True) super(APIControllersRBACTestCase, cls).setUpClass() cls.coordinator = coordination.get_coordinator(use_cache=False) # Register mock service in the service registry for testing purposes service = six.binary_type(six.text_type('mock_service').encode('ascii')) register_service_in_service_registry(service=service, capabilities={'key1': 'value1', 'name': 'mock_service'}, start_heart=True)
def setUpClass(cls): super(ServiceyRegistryControllerTestCase, cls).setUpClass() tests_config.parse_args(coordinator_noop=True) cls.coordinator = coordination.get_coordinator(use_cache=False) # NOTE: We mock call common_setup to emulate service being registered in the service # registry during bootstrap phase register_service_in_service_registry(service='mock_service', capabilities={'key1': 'value1', 'name': 'mock_service'}, start_heart=True)
def get_all(self, requester_user): rbac_utils = get_rbac_backend().get_utils_class() rbac_utils.assert_user_is_admin(user_db=requester_user) coordinator = coordination.get_coordinator() group_ids = list(coordinator.get_groups().get()) group_ids = [item.decode('utf-8') for item in group_ids] result = { 'groups': group_ids } return result
def __init__(self): self.message_type = LiveActionDB self._shutdown = False self._pool = eventlet.GreenPool(size=cfg.CONF.scheduler.pool_size) # If an ActionExecutionSchedulingQueueItemDB object hasn't been updated fore more than # this amount of milliseconds, it will be marked as "handled=False". # As soon as an item is picked by scheduler to be processed, it should be processed very # fast (< 5 seconds). If an item is still being marked as processing it likely indicates # that the scheduler process which was processing that item crashed or similar so we need # to mark it as "handling=False" so some other scheduler process can pick it up. self._execution_scheduling_timeout_threshold_ms = \ cfg.CONF.scheduler.execution_scheduling_timeout_threshold_min * 60 * 1000 self._coordinator = coordination_service.get_coordinator( start_heart=True) self._main_thread = None self._cleanup_thread = None
def deregister_service(service, start_heart=True): if not isinstance(service, six.binary_type): group_id = service.encode("utf-8") else: group_id = service coordinator = coordination.get_coordinator(start_heart=start_heart) member_id = coordination.get_member_id() LOG.debug( 'Leaving service registry group "%s" as member_id "%s"' % (group_id, member_id) ) try: coordinator.leave_group(group_id).get() except (GroupNotCreated, MemberNotJoined): pass
def handle_action_execution_completion(ac_ex_db): # Check that the action execution is completed. if ac_ex_db.status not in ac_const.LIVEACTION_COMPLETED_STATES: raise Exception( 'Unable to handle completion of action execution. The action execution ' '"%s" is in "%s" status.' % (str(ac_ex_db.id), ac_ex_db.status) ) # Get related record identifiers. wf_ex_id = ac_ex_db.context['orquesta']['workflow_execution_id'] task_ex_id = ac_ex_db.context['orquesta']['task_execution_id'] # Acquire lock before write operations. with coord_svc.get_coordinator(start_heart=True).get_lock(wf_ex_id): # Get execution records for logging purposes. wf_ex_db = wf_db_access.WorkflowExecution.get_by_id(wf_ex_id) task_ex_db = wf_db_access.TaskExecution.get_by_id(task_ex_id) msg = ('[%s] Handling completion of action execution "%s" ' 'in status "%s" for task "%s", route "%s".') LOG.info(msg, wf_ex_db.action_execution, str(ac_ex_db.id), ac_ex_db.status, task_ex_db.task_id, str(task_ex_db.task_route)) # If task is currently paused and the action execution is skipped to # completion, then transition task status to running first. if task_ex_db.status == ac_const.LIVEACTION_STATUS_PAUSED: resume_task_execution(task_ex_id) # Update task execution if completed. update_task_execution(task_ex_id, ac_ex_db.status, ac_ex_db.result, ac_ex_db.context) # Update task flow in the workflow execution. update_task_state( task_ex_id, ac_ex_db.status, ac_ex_result=ac_ex_db.result, ac_ex_ctx=ac_ex_db.context.get('orquesta') ) # Request the next set of tasks if workflow execution is not complete. request_next_tasks(wf_ex_db, task_ex_id=task_ex_id) # Update workflow execution if completed. update_workflow_execution(wf_ex_id)
def update_execution(liveaction_db, publish=True, set_result_size=False): """ :param set_result_size: True to calculate size of the serialized result field value and set it on the "result_size" database field. """ execution = ActionExecution.get(liveaction__id=str(liveaction_db.id)) with coordination.get_coordinator().get_lock( str(liveaction_db.id).encode()): # Skip execution object update when action is already in completed state. if execution.status in action_constants.LIVEACTION_COMPLETED_STATES: LOG.debug( "[%s] Action is already in completed state: %s. Skipping execution update to state: %s." % (execution.id, execution.status, liveaction_db.status)) return execution decomposed = _decompose_liveaction(liveaction_db) kw = {} for k, v in six.iteritems(decomposed): kw["set__" + k] = v if liveaction_db.status != execution.status: # Note: If the status changes we store this transition in the "log" attribute of action # execution kw["push__log"] = _create_execution_log_entry(liveaction_db.status) if set_result_size: # Sadly with the current ORM abstraction there is no better way to achieve updating # result_size and we need to serialize the value again - luckily that operation is fast. # To put things into perspective - on 4 MB result dictionary it only takes 7 ms which is # negligible compared to other DB operations duration (and for smaller results it takes # in sub ms range). with Timer(key="action.executions.calculate_result_size"): result_size = len( ActionExecutionDB.result._serialize_field_value( liveaction_db.result)) kw["set__result_size"] = result_size execution = ActionExecution.update(execution, publish=publish, **kw) return execution
def main(group_id=None): coordinator = coordination.get_coordinator() if not group_id: group_ids = list(coordinator.get_groups().get()) group_ids = [item.decode('utf-8') for item in group_ids] print('Available groups (%s):' % (len(group_ids))) for group_id in group_ids: print(' - %s' % (group_id)) print('') else: group_ids = [group_id] for group_id in group_ids: member_ids = list(coordinator.get_members(group_id).get()) member_ids = [member_id.decode('utf-8') for member_id in member_ids] print('Members in group "%s" (%s):' % (group_id, len(member_ids))) for member_id in member_ids: capabilities = coordinator.get_member_capabilities(group_id, member_id).get() print(' - %s (capabilities=%s)' % (member_id, str(capabilities)))
def recover_delayed_executions(): coordinator = coordination.get_coordinator() dt_now = date_utils.get_datetime_utc_now() dt_delta = datetime.timedelta( seconds=cfg.CONF.scheduler.delayed_execution_recovery) dt_timeout = dt_now - dt_delta with coordinator.get_lock('st2-rescheduling-delayed-executions'): liveactions = LiveAction.query( status=action_constants.LIVEACTION_STATUS_DELAYED, start_timestamp__lte=dt_timeout, order_by=['start_timestamp']) if not liveactions: return LOG.info( 'There are %d liveactions that have been delayed for longer than %d seconds.', len(liveactions), cfg.CONF.scheduler.delayed_execution_recovery) # Update status to requested and publish status for each liveactions. rescheduled = 0 for instance in liveactions: try: action_service.update_status( instance, action_constants.LIVEACTION_STATUS_REQUESTED, publish=True) rescheduled += 1 except: LOG.exception( 'Unable to reschedule liveaction. <LiveAction.id=%s>', instance.id) LOG.info('Rescheduled %d out of %d delayed liveactions.', len(liveactions), rescheduled)
def __init__(self, policy_ref, policy_type, *args, **kwargs): super(ConcurrencyByAttributeApplicator, self).__init__(policy_ref, policy_type, *args, **kwargs) self.coordinator = coordination.get_coordinator() self.threshold = kwargs.get("threshold", 0) self.attributes = kwargs.get("attributes", [])
def __init__(self): self._coordinator = coordination.get_coordinator() super(KeyValuePairController, self).__init__()
def __init__(self, policy_ref, policy_type, *args, **kwargs): super(ConcurrencyApplicator, self).__init__(policy_ref, policy_type, *args, **kwargs) self.coordinator = coordination.get_coordinator() self.threshold = kwargs.get('threshold', 0)
def __init__(self): super(KeyValuePairController, self).__init__() self._coordinator = coordination.get_coordinator() self.get_one_db_method = self._get_by_name
def setUpClass(cls): super(SynchronizationTest, cls).setUpClass() tests_config.parse_args() cls.coordinator = coordination.get_coordinator()
def setUpClass(cls): super(SynchronizationTest, cls).setUpClass() tests_config.parse_args(coordinator_noop=False) cls.coordinator = coordination.get_coordinator(use_cache=False)