Ejemplo n.º 1
0
 def approve(self, request=None):
     self.status = 'successful'
     self.save()
     self.send_approval_notification('approved')
     self.websocket_emit_status(self.status)
     schedule_task_manager()
     return reverse('api:workflow_approval_approve', kwargs={'pk': self.pk}, request=request)
Ejemplo n.º 2
0
 def deny(self, request=None):
     self.status = 'failed'
     self.save()
     self.send_approval_notification('denied')
     self.websocket_emit_status(self.status)
     schedule_task_manager()
     return reverse('api:workflow_approval_deny', kwargs={'pk': self.pk}, request=request)
Ejemplo n.º 3
0
 def process_finished_workflow_jobs(self, workflow_jobs):
     result = []
     for workflow_job in workflow_jobs:
         dag = WorkflowDAG(workflow_job)
         status_changed = False
         if workflow_job.cancel_flag:
             logger.debug('Canceling spawned jobs of %s due to cancel flag.', workflow_job.log_format)
             cancel_finished = dag.cancel_node_jobs()
             if cancel_finished:
                 logger.info('Marking %s as canceled, all spawned jobs have concluded.', workflow_job.log_format)
                 workflow_job.status = 'canceled'
                 workflow_job.start_args = ''  # blank field to remove encrypted passwords
                 workflow_job.save(update_fields=['status', 'start_args'])
                 status_changed = True
         else:
             is_done, has_failed = dag.is_workflow_done()
             if not is_done:
                 continue
             logger.info('Marking %s as %s.', workflow_job.log_format, 'failed' if has_failed else 'successful')
             result.append(workflow_job.id)
             new_status = 'failed' if has_failed else 'successful'
             logger.debug(six.text_type("Transitioning {} to {} status.").format(workflow_job.log_format, new_status))
             workflow_job.status = new_status
             workflow_job.start_args = ''  # blank field to remove encrypted passwords
             workflow_job.save(update_fields=['status', 'start_args'])
             status_changed = True
         if status_changed:
             workflow_job.websocket_emit_status(workflow_job.status)
             if workflow_job.spawned_by_workflow:
                 schedule_task_manager()
     return result
Ejemplo n.º 4
0
    def start_task(self, task, instance_group, dependent_tasks=None, instance=None):
        self.subsystem_metrics.inc("task_manager_tasks_started", 1)
        self.start_task_limit -= 1
        if self.start_task_limit == 0:
            # schedule another run immediately after this task manager
            schedule_task_manager()
        from awx.main.tasks.system import handle_work_error, handle_work_success

        dependent_tasks = dependent_tasks or []

        task_actual = {
            'type': get_type_for_model(type(task)),
            'id': task.id,
        }
        dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]

        task.status = 'waiting'

        (start_status, opts) = task.pre_start()
        if not start_status:
            task.status = 'failed'
            if task.job_explanation:
                task.job_explanation += ' '
            task.job_explanation += 'Task failed pre-start check.'
            task.save()
            # TODO: run error handler to fail sub-tasks and send notifications
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
                task.send_notification_templates('running')
                logger.debug('Transitioning %s to running status.', task.log_format)
                schedule_task_manager()
            # at this point we already have control/execution nodes selected for the following cases
            else:
                task.instance_group = instance_group
                execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else ''
                logger.debug(
                    f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.'
                )
            with disable_activity_stream():
                task.celery_task_id = str(uuid.uuid4())
                task.save()
                task.log_lifecycle("waiting")

        def post_commit():
            if task.status != 'failed' and type(task) is not WorkflowJob:
                # Before task is dispatched, ensure that job_event partitions exist
                create_partition(task.event_class._meta.db_table, start=task.created)
                task_cls = task._get_task_class()
                task_cls.apply_async(
                    [task.pk],
                    opts,
                    queue=task.get_queue_name(),
                    uuid=task.celery_task_id,
                    callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
                    errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
                )

        task.websocket_emit_status(task.status)  # adds to on_commit
        connection.on_commit(post_commit)
Ejemplo n.º 5
0
 def process_finished_workflow_jobs(self, workflow_jobs):
     result = []
     for workflow_job in workflow_jobs:
         dag = WorkflowDAG(workflow_job)
         status_changed = False
         if workflow_job.cancel_flag:
             workflow_job.workflow_nodes.filter(
                 do_not_run=False, job__isnull=True).update(do_not_run=True)
             logger.debug(
                 'Canceling spawned jobs of %s due to cancel flag.',
                 workflow_job.log_format)
             cancel_finished = dag.cancel_node_jobs()
             if cancel_finished:
                 logger.info(
                     'Marking %s as canceled, all spawned jobs have concluded.',
                     workflow_job.log_format)
                 workflow_job.status = 'canceled'
                 workflow_job.start_args = ''  # blank field to remove encrypted passwords
                 workflow_job.save(update_fields=['status', 'start_args'])
                 status_changed = True
         else:
             workflow_nodes = dag.mark_dnr_nodes()
             for n in workflow_nodes:
                 n.save(update_fields=['do_not_run'])
             is_done = dag.is_workflow_done()
             if not is_done:
                 continue
             has_failed, reason = dag.has_workflow_failed()
             logger.debug('Marking %s as %s.', workflow_job.log_format,
                          'failed' if has_failed else 'successful')
             result.append(workflow_job.id)
             new_status = 'failed' if has_failed else 'successful'
             logger.debug("Transitioning {} to {} status.".format(
                 workflow_job.log_format, new_status))
             update_fields = ['status', 'start_args']
             workflow_job.status = new_status
             if reason:
                 logger.info(
                     f'Workflow job {workflow_job.id} failed due to reason: {reason}'
                 )
                 workflow_job.job_explanation = gettext_noop(
                     "No error handling paths found, marking workflow as failed"
                 )
                 update_fields.append('job_explanation')
             workflow_job.start_args = ''  # blank field to remove encrypted passwords
             workflow_job.save(update_fields=update_fields)
             status_changed = True
         if status_changed:
             workflow_job.websocket_emit_status(workflow_job.status)
             # Operations whose queries rely on modifications made during the atomic scheduling session
             workflow_job.send_notification_templates(
                 'succeeded' if workflow_job.status ==
                 'successful' else 'failed')
             if workflow_job.spawned_by_workflow:
                 schedule_task_manager()
     return result
Ejemplo n.º 6
0
 def process_finished_workflow_jobs(self, workflow_jobs):
     result = []
     for workflow_job in workflow_jobs:
         dag = WorkflowDAG(workflow_job)
         status_changed = False
         if workflow_job.cancel_flag:
             workflow_job.workflow_nodes.filter(
                 do_not_run=False, job__isnull=True).update(do_not_run=True)
             logger.debug(
                 'Canceling spawned jobs of %s due to cancel flag.',
                 workflow_job.log_format)
             cancel_finished = dag.cancel_node_jobs()
             if cancel_finished:
                 logger.info(
                     'Marking %s as canceled, all spawned jobs have concluded.',
                     workflow_job.log_format)
                 workflow_job.status = 'canceled'
                 workflow_job.start_args = ''  # blank field to remove encrypted passwords
                 workflow_job.save(update_fields=['status', 'start_args'])
                 status_changed = True
         else:
             workflow_nodes = dag.mark_dnr_nodes()
             map(lambda n: n.save(update_fields=['do_not_run']),
                 workflow_nodes)
             is_done = dag.is_workflow_done()
             if not is_done:
                 continue
             has_failed, reason = dag.has_workflow_failed()
             logger.info('Marking %s as %s.', workflow_job.log_format,
                         'failed' if has_failed else 'successful')
             result.append(workflow_job.id)
             new_status = 'failed' if has_failed else 'successful'
             logger.debug(
                 six.text_type("Transitioning {} to {} status.").format(
                     workflow_job.log_format, new_status))
             update_fields = ['status', 'start_args']
             workflow_job.status = new_status
             if reason:
                 logger.info(reason)
                 workflow_job.job_explanation = "No error handling paths found, marking workflow as failed"
                 update_fields.append('job_explanation')
             workflow_job.start_args = ''  # blank field to remove encrypted passwords
             workflow_job.save(update_fields=update_fields)
             status_changed = True
         if status_changed:
             workflow_job.websocket_emit_status(workflow_job.status)
             if workflow_job.spawned_by_workflow:
                 schedule_task_manager()
     return result
Ejemplo n.º 7
0
    def start_task(self, task, rampart_group, dependent_tasks=None, instance=None):
        self.start_task_limit -= 1
        if self.start_task_limit == 0:
            # schedule another run immediately after this task manager
            schedule_task_manager()
        from awx.main.tasks import handle_work_error, handle_work_success

        dependent_tasks = dependent_tasks or []

        task_actual = {
            'type': get_type_for_model(type(task)),
            'id': task.id,
        }
        dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]

        task.status = 'waiting'

        (start_status, opts) = task.pre_start()
        if not start_status:
            task.status = 'failed'
            if task.job_explanation:
                task.job_explanation += ' '
            task.job_explanation += 'Task failed pre-start check.'
            task.save()
            # TODO: run error handler to fail sub-tasks and send notifications
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
                task.send_notification_templates('running')
                logger.debug('Transitioning %s to running status.', task.log_format)
                schedule_task_manager()
            elif rampart_group.is_container_group:
                task.instance_group = rampart_group
                if task.capacity_type == 'execution':
                    # find one real, non-containerized instance with capacity to
                    # act as the controller for k8s API interaction
                    try:
                        task.controller_node = Instance.choose_online_control_plane_node()
                        task.log_lifecycle("controller_node_chosen")
                    except IndexError:
                        logger.warning("No control plane nodes available to run containerized job {}".format(task.log_format))
                        return
                else:
                    # project updates and system jobs don't *actually* run in pods, so
                    # just pick *any* non-containerized host and use it as the execution node
                    task.execution_node = Instance.choose_online_control_plane_node()
                    task.log_lifecycle("execution_node_chosen")
                    logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
            else:
                task.instance_group = rampart_group
                task.execution_node = instance.hostname
                task.log_lifecycle("execution_node_chosen")
                if instance.node_type == 'execution':
                    try:
                        task.controller_node = Instance.choose_online_control_plane_node()
                        task.log_lifecycle("controller_node_chosen")
                    except IndexError:
                        logger.warning("No control plane nodes available to manage {}".format(task.log_format))
                        return
                else:
                    # control plane nodes will manage jobs locally for performance and resilience
                    task.controller_node = task.execution_node
                    task.log_lifecycle("controller_node_chosen")
                logger.debug('Submitting job {} to queue {} controlled by {}.'.format(task.log_format, task.execution_node, task.controller_node))
            with disable_activity_stream():
                task.celery_task_id = str(uuid.uuid4())
                task.save()
                task.log_lifecycle("waiting")

            if rampart_group is not None:
                self.consume_capacity(task, rampart_group.name, instance=instance)

        def post_commit():
            if task.status != 'failed' and type(task) is not WorkflowJob:
                # Before task is dispatched, ensure that job_event partitions exist
                create_partition(task.event_class._meta.db_table, start=task.created)
                task_cls = task._get_task_class()
                task_cls.apply_async(
                    [task.pk],
                    opts,
                    queue=task.get_queue_name(),
                    uuid=task.celery_task_id,
                    callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
                    errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
                )

        task.websocket_emit_status(task.status)  # adds to on_commit
        connection.on_commit(post_commit)
Ejemplo n.º 8
0
    def start_task(self, task, rampart_group, dependent_tasks=None, instance=None):
        from awx.main.tasks import handle_work_error, handle_work_success

        dependent_tasks = dependent_tasks or []

        task_actual = {
            'type': get_type_for_model(type(task)),
            'id': task.id,
        }
        dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]

        controller_node = None
        if task.supports_isolation() and rampart_group.controller_id:
            try:
                controller_node = rampart_group.choose_online_controller_node()
            except IndexError:
                logger.debug("No controllers available in group {} to run {}".format(
                             rampart_group.name, task.log_format))
                return

        task.status = 'waiting'

        (start_status, opts) = task.pre_start()
        if not start_status:
            task.status = 'failed'
            if task.job_explanation:
                task.job_explanation += ' '
            task.job_explanation += 'Task failed pre-start check.'
            task.save()
            # TODO: run error handler to fail sub-tasks and send notifications
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
                task.send_notification_templates('running')
                logger.debug('Transitioning %s to running status.', task.log_format)
                schedule_task_manager()
            elif not task.supports_isolation() and rampart_group.controller_id:
                # non-Ansible jobs on isolated instances run on controller
                task.instance_group = rampart_group.controller
                task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True)))
                logger.debug('Submitting isolated {} to queue {} on node {}.'.format(
                             task.log_format, task.instance_group.name, task.execution_node))
            elif controller_node:
                task.instance_group = rampart_group
                task.execution_node = instance.hostname
                task.controller_node = controller_node
                logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format(
                             task.log_format, task.execution_node, controller_node))
            elif rampart_group.is_containerized:
                # find one real, non-containerized instance with capacity to
                # act as the controller for k8s API interaction
                match = None
                for group in InstanceGroup.objects.all():
                    if group.is_containerized or group.controller_id:
                        continue
                    match = group.fit_task_to_most_remaining_capacity_instance(task)
                    if match:
                        break
                task.instance_group = rampart_group
                if match is None:
                    logger.warn(
                        'No available capacity to run containerized <{}>.'.format(task.log_format)
                    )
                else:
                    if task.supports_isolation():
                        task.controller_node = match.hostname
                    else:
                        # project updates and inventory updates don't *actually* run in pods,
                        # so just pick *any* non-isolated, non-containerized host and use it
                        # as the execution node
                        task.execution_node = match.hostname
                        logger.debug('Submitting containerized {} to queue {}.'.format(
                                     task.log_format, task.execution_node))
            else:
                task.instance_group = rampart_group
                if instance is not None:
                    task.execution_node = instance.hostname
                logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format(
                             task.log_format, task.instance_group_id, task.execution_node))
            with disable_activity_stream():
                task.celery_task_id = str(uuid.uuid4())
                task.save()

            if rampart_group is not None:
                self.consume_capacity(task, rampart_group.name)

        def post_commit():
            if task.status != 'failed' and type(task) is not WorkflowJob:
                task_cls = task._get_task_class()
                task_cls.apply_async(
                    [task.pk],
                    opts,
                    queue=task.get_queue_name(),
                    uuid=task.celery_task_id,
                    callbacks=[{
                        'task': handle_work_success.name,
                        'kwargs': {'task_actual': task_actual}
                    }],
                    errbacks=[{
                        'task': handle_work_error.name,
                        'args': [task.celery_task_id],
                        'kwargs': {'subtasks': [task_actual] + dependencies}
                    }],
                )

        task.websocket_emit_status(task.status)  # adds to on_commit
        connection.on_commit(post_commit)
Ejemplo n.º 9
0
    def start_task(self, task, rampart_group, dependent_tasks=None, instance=None):
        self.start_task_limit -= 1
        if self.start_task_limit == 0:
            # schedule another run immediately after this task manager
            schedule_task_manager()
        from awx.main.tasks import handle_work_error, handle_work_success

        dependent_tasks = dependent_tasks or []

        task_actual = {
            'type': get_type_for_model(type(task)),
            'id': task.id,
        }
        dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks]

        task.status = 'waiting'

        (start_status, opts) = task.pre_start()
        if not start_status:
            task.status = 'failed'
            if task.job_explanation:
                task.job_explanation += ' '
            task.job_explanation += 'Task failed pre-start check.'
            task.save()
            # TODO: run error handler to fail sub-tasks and send notifications
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
                task.send_notification_templates('running')
                logger.debug('Transitioning %s to running status.', task.log_format)
                schedule_task_manager()
            elif rampart_group.is_container_group:
                # find one real, non-containerized instance with capacity to
                # act as the controller for k8s API interaction
                match = None
                for group in InstanceGroup.objects.filter(is_container_group=False):
                    match = group.fit_task_to_most_remaining_capacity_instance(task, group.instances.all())
                    if match:
                        break
                task.instance_group = rampart_group
                if match is None:
                    logger.warn('No available capacity to run containerized <{}>.'.format(task.log_format))
                elif task.can_run_containerized and any(ig.is_container_group for ig in task.preferred_instance_groups):
                    task.controller_node = match.hostname
                else:
                    # project updates and inventory updates don't *actually* run in pods, so
                    # just pick *any* non-containerized host and use it as the execution node
                    task.execution_node = match.hostname
                    logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node))
            else:
                task.instance_group = rampart_group
                if instance is not None:
                    task.execution_node = instance.hostname
                logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format(task.log_format, task.instance_group_id, task.execution_node))
            with disable_activity_stream():
                task.celery_task_id = str(uuid.uuid4())
                task.save()
                task.log_lifecycle("waiting")

            if rampart_group is not None:
                self.consume_capacity(task, rampart_group.name)

        def post_commit():
            if task.status != 'failed' and type(task) is not WorkflowJob:
                task_cls = task._get_task_class()
                task_cls.apply_async(
                    [task.pk],
                    opts,
                    queue=task.get_queue_name(),
                    uuid=task.celery_task_id,
                    callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}],
                    errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}],
                )

        task.websocket_emit_status(task.status)  # adds to on_commit
        connection.on_commit(post_commit)
Ejemplo n.º 10
0
    def start_task(self,
                   task,
                   rampart_group,
                   dependent_tasks=None,
                   instance=None):
        from awx.main.tasks import handle_work_error, handle_work_success

        dependent_tasks = dependent_tasks or []

        task_actual = {
            'type': get_type_for_model(type(task)),
            'id': task.id,
        }
        dependencies = [{
            'type': get_type_for_model(type(t)),
            'id': t.id
        } for t in dependent_tasks]

        controller_node = None
        if task.supports_isolation() and rampart_group.controller_id:
            try:
                controller_node = rampart_group.choose_online_controller_node()
            except IndexError:
                logger.debug(
                    six.text_type(
                        "No controllers available in group {} to run {}").
                    format(rampart_group.name, task.log_format))
                return

        task.status = 'waiting'

        (start_status, opts) = task.pre_start()
        if not start_status:
            task.status = 'failed'
            if task.job_explanation:
                task.job_explanation += ' '
            task.job_explanation += 'Task failed pre-start check.'
            task.save()
            # TODO: run error handler to fail sub-tasks and send notifications
        else:
            if type(task) is WorkflowJob:
                task.status = 'running'
                logger.info('Transitioning %s to running status.',
                            task.log_format)
                schedule_task_manager()
            elif not task.supports_isolation() and rampart_group.controller_id:
                # non-Ansible jobs on isolated instances run on controller
                task.instance_group = rampart_group.controller
                task.execution_node = random.choice(
                    list(rampart_group.controller.instances.all().values_list(
                        'hostname', flat=True)))
                logger.info(
                    six.text_type(
                        'Submitting isolated {} to queue {}.').format(
                            task.log_format, task.instance_group.name,
                            task.execution_node))
            elif controller_node:
                task.instance_group = rampart_group
                task.execution_node = instance.hostname
                task.controller_node = controller_node
                logger.info(
                    six.text_type(
                        'Submitting isolated {} to queue {} controlled by {}.'
                    ).format(task.log_format, task.execution_node,
                             controller_node))
            else:
                task.instance_group = rampart_group
                if instance is not None:
                    task.execution_node = instance.hostname
                logger.info(
                    six.text_type(
                        'Submitting {} to <instance group, instance> <{},{}>.'
                    ).format(task.log_format, task.instance_group_id,
                             task.execution_node))
            with disable_activity_stream():
                task.celery_task_id = str(uuid.uuid4())
                task.save()

            if rampart_group is not None:
                self.consume_capacity(task, rampart_group.name)

        def post_commit():
            if task.status != 'failed' and type(task) is not WorkflowJob:
                task_cls = task._get_task_class()
                task_cls.apply_async(
                    [task.pk],
                    opts,
                    queue=task.get_queue_name(),
                    uuid=task.celery_task_id,
                    callbacks=[{
                        'task': handle_work_success.name,
                        'kwargs': {
                            'task_actual': task_actual
                        }
                    }],
                    errbacks=[{
                        'task': handle_work_error.name,
                        'args': [task.celery_task_id],
                        'kwargs': {
                            'subtasks': [task_actual] + dependencies
                        }
                    }],
                )

        task.websocket_emit_status(task.status)  # adds to on_commit
        connection.on_commit(post_commit)