def approve(self, request=None): self.status = 'successful' self.save() self.send_approval_notification('approved') self.websocket_emit_status(self.status) schedule_task_manager() return reverse('api:workflow_approval_approve', kwargs={'pk': self.pk}, request=request)
def deny(self, request=None): self.status = 'failed' self.save() self.send_approval_notification('denied') self.websocket_emit_status(self.status) schedule_task_manager() return reverse('api:workflow_approval_deny', kwargs={'pk': self.pk}, request=request)
def process_finished_workflow_jobs(self, workflow_jobs): result = [] for workflow_job in workflow_jobs: dag = WorkflowDAG(workflow_job) status_changed = False if workflow_job.cancel_flag: logger.debug('Canceling spawned jobs of %s due to cancel flag.', workflow_job.log_format) cancel_finished = dag.cancel_node_jobs() if cancel_finished: logger.info('Marking %s as canceled, all spawned jobs have concluded.', workflow_job.log_format) workflow_job.status = 'canceled' workflow_job.start_args = '' # blank field to remove encrypted passwords workflow_job.save(update_fields=['status', 'start_args']) status_changed = True else: is_done, has_failed = dag.is_workflow_done() if not is_done: continue logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful') result.append(workflow_job.id) new_status = 'failed' if has_failed else 'successful' logger.debug(six.text_type("Transitioning {} to {} status.").format(workflow_job.log_format, new_status)) workflow_job.status = new_status workflow_job.start_args = '' # blank field to remove encrypted passwords workflow_job.save(update_fields=['status', 'start_args']) status_changed = True if status_changed: workflow_job.websocket_emit_status(workflow_job.status) if workflow_job.spawned_by_workflow: schedule_task_manager() return result
def start_task(self, task, instance_group, dependent_tasks=None, instance=None): self.subsystem_metrics.inc("task_manager_tasks_started", 1) self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks.system import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() # at this point we already have control/execution nodes selected for the following cases else: task.instance_group = instance_group execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else '' logger.debug( f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.' ) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: # Before task is dispatched, ensure that job_event partitions exist create_partition(task.event_class._meta.db_table, start=task.created) task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}], errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def process_finished_workflow_jobs(self, workflow_jobs): result = [] for workflow_job in workflow_jobs: dag = WorkflowDAG(workflow_job) status_changed = False if workflow_job.cancel_flag: workflow_job.workflow_nodes.filter( do_not_run=False, job__isnull=True).update(do_not_run=True) logger.debug( 'Canceling spawned jobs of %s due to cancel flag.', workflow_job.log_format) cancel_finished = dag.cancel_node_jobs() if cancel_finished: logger.info( 'Marking %s as canceled, all spawned jobs have concluded.', workflow_job.log_format) workflow_job.status = 'canceled' workflow_job.start_args = '' # blank field to remove encrypted passwords workflow_job.save(update_fields=['status', 'start_args']) status_changed = True else: workflow_nodes = dag.mark_dnr_nodes() for n in workflow_nodes: n.save(update_fields=['do_not_run']) is_done = dag.is_workflow_done() if not is_done: continue has_failed, reason = dag.has_workflow_failed() logger.debug('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful') result.append(workflow_job.id) new_status = 'failed' if has_failed else 'successful' logger.debug("Transitioning {} to {} status.".format( workflow_job.log_format, new_status)) update_fields = ['status', 'start_args'] workflow_job.status = new_status if reason: logger.info( f'Workflow job {workflow_job.id} failed due to reason: {reason}' ) workflow_job.job_explanation = gettext_noop( "No error handling paths found, marking workflow as failed" ) update_fields.append('job_explanation') workflow_job.start_args = '' # blank field to remove encrypted passwords workflow_job.save(update_fields=update_fields) status_changed = True if status_changed: workflow_job.websocket_emit_status(workflow_job.status) # Operations whose queries rely on modifications made during the atomic scheduling session workflow_job.send_notification_templates( 'succeeded' if workflow_job.status == 'successful' else 'failed') if workflow_job.spawned_by_workflow: schedule_task_manager() return result
def process_finished_workflow_jobs(self, workflow_jobs): result = [] for workflow_job in workflow_jobs: dag = WorkflowDAG(workflow_job) status_changed = False if workflow_job.cancel_flag: workflow_job.workflow_nodes.filter( do_not_run=False, job__isnull=True).update(do_not_run=True) logger.debug( 'Canceling spawned jobs of %s due to cancel flag.', workflow_job.log_format) cancel_finished = dag.cancel_node_jobs() if cancel_finished: logger.info( 'Marking %s as canceled, all spawned jobs have concluded.', workflow_job.log_format) workflow_job.status = 'canceled' workflow_job.start_args = '' # blank field to remove encrypted passwords workflow_job.save(update_fields=['status', 'start_args']) status_changed = True else: workflow_nodes = dag.mark_dnr_nodes() map(lambda n: n.save(update_fields=['do_not_run']), workflow_nodes) is_done = dag.is_workflow_done() if not is_done: continue has_failed, reason = dag.has_workflow_failed() logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful') result.append(workflow_job.id) new_status = 'failed' if has_failed else 'successful' logger.debug( six.text_type("Transitioning {} to {} status.").format( workflow_job.log_format, new_status)) update_fields = ['status', 'start_args'] workflow_job.status = new_status if reason: logger.info(reason) workflow_job.job_explanation = "No error handling paths found, marking workflow as failed" update_fields.append('job_explanation') workflow_job.start_args = '' # blank field to remove encrypted passwords workflow_job.save(update_fields=update_fields) status_changed = True if status_changed: workflow_job.websocket_emit_status(workflow_job.status) if workflow_job.spawned_by_workflow: schedule_task_manager() return result
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif rampart_group.is_container_group: task.instance_group = rampart_group if task.capacity_type == 'execution': # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction try: task.controller_node = Instance.choose_online_control_plane_node() task.log_lifecycle("controller_node_chosen") except IndexError: logger.warning("No control plane nodes available to run containerized job {}".format(task.log_format)) return else: # project updates and system jobs don't *actually* run in pods, so # just pick *any* non-containerized host and use it as the execution node task.execution_node = Instance.choose_online_control_plane_node() task.log_lifecycle("execution_node_chosen") logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node)) else: task.instance_group = rampart_group task.execution_node = instance.hostname task.log_lifecycle("execution_node_chosen") if instance.node_type == 'execution': try: task.controller_node = Instance.choose_online_control_plane_node() task.log_lifecycle("controller_node_chosen") except IndexError: logger.warning("No control plane nodes available to manage {}".format(task.log_format)) return else: # control plane nodes will manage jobs locally for performance and resilience task.controller_node = task.execution_node task.log_lifecycle("controller_node_chosen") logger.debug('Submitting job {} to queue {} controlled by {}.'.format(task.log_format, task.execution_node, task.controller_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") if rampart_group is not None: self.consume_capacity(task, rampart_group.name, instance=instance) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: # Before task is dispatched, ensure that job_event partitions exist create_partition(task.event_class._meta.db_table, start=task.created) task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}], errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks] controller_node = None if task.supports_isolation() and rampart_group.controller_id: try: controller_node = rampart_group.choose_online_controller_node() except IndexError: logger.debug("No controllers available in group {} to run {}".format( rampart_group.name, task.log_format)) return task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif not task.supports_isolation() and rampart_group.controller_id: # non-Ansible jobs on isolated instances run on controller task.instance_group = rampart_group.controller task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True))) logger.debug('Submitting isolated {} to queue {} on node {}.'.format( task.log_format, task.instance_group.name, task.execution_node)) elif controller_node: task.instance_group = rampart_group task.execution_node = instance.hostname task.controller_node = controller_node logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format( task.log_format, task.execution_node, controller_node)) elif rampart_group.is_containerized: # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction match = None for group in InstanceGroup.objects.all(): if group.is_containerized or group.controller_id: continue match = group.fit_task_to_most_remaining_capacity_instance(task) if match: break task.instance_group = rampart_group if match is None: logger.warn( 'No available capacity to run containerized <{}>.'.format(task.log_format) ) else: if task.supports_isolation(): task.controller_node = match.hostname else: # project updates and inventory updates don't *actually* run in pods, # so just pick *any* non-isolated, non-containerized host and use it # as the execution node task.execution_node = match.hostname logger.debug('Submitting containerized {} to queue {}.'.format( task.log_format, task.execution_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format( task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() if rampart_group is not None: self.consume_capacity(task, rampart_group.name) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual} }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies} }], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif rampart_group.is_container_group: # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction match = None for group in InstanceGroup.objects.filter(is_container_group=False): match = group.fit_task_to_most_remaining_capacity_instance(task, group.instances.all()) if match: break task.instance_group = rampart_group if match is None: logger.warn('No available capacity to run containerized <{}>.'.format(task.log_format)) elif task.can_run_containerized and any(ig.is_container_group for ig in task.preferred_instance_groups): task.controller_node = match.hostname else: # project updates and inventory updates don't *actually* run in pods, so # just pick *any* non-containerized host and use it as the execution node task.execution_node = match.hostname logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format(task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") if rampart_group is not None: self.consume_capacity(task, rampart_group.name) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}], errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{ 'type': get_type_for_model(type(t)), 'id': t.id } for t in dependent_tasks] controller_node = None if task.supports_isolation() and rampart_group.controller_id: try: controller_node = rampart_group.choose_online_controller_node() except IndexError: logger.debug( six.text_type( "No controllers available in group {} to run {}"). format(rampart_group.name, task.log_format)) return task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' logger.info('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif not task.supports_isolation() and rampart_group.controller_id: # non-Ansible jobs on isolated instances run on controller task.instance_group = rampart_group.controller task.execution_node = random.choice( list(rampart_group.controller.instances.all().values_list( 'hostname', flat=True))) logger.info( six.text_type( 'Submitting isolated {} to queue {}.').format( task.log_format, task.instance_group.name, task.execution_node)) elif controller_node: task.instance_group = rampart_group task.execution_node = instance.hostname task.controller_node = controller_node logger.info( six.text_type( 'Submitting isolated {} to queue {} controlled by {}.' ).format(task.log_format, task.execution_node, controller_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname logger.info( six.text_type( 'Submitting {} to <instance group, instance> <{},{}>.' ).format(task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() if rampart_group is not None: self.consume_capacity(task, rampart_group.name) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': { 'task_actual': task_actual } }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': { 'subtasks': [task_actual] + dependencies } }], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)