Ejemplo n.º 1
0
 def test_workflow_fails_leaf(self):
     wfj = self.workflow_job(states=['successful', 'successful', 'failed', None, None])
     dag = WorkflowDAG(workflow_job=wfj)
     dag.mark_dnr_nodes()
     is_done = dag.is_workflow_done()
     has_failed = dag.has_workflow_failed()
     self.assertTrue(is_done)
     self.assertTrue(has_failed)
Ejemplo n.º 2
0
 def test_workflow_not_finished(self):
     wfj = self.workflow_job(states=['new', None, None, None, None])
     dag = WorkflowDAG(workflow_job=wfj)
     dag.mark_dnr_nodes()
     is_done = dag.is_workflow_done()
     has_failed, reason = dag.has_workflow_failed()
     self.assertFalse(is_done)
     self.assertFalse(has_failed)
     assert reason is None
 def test_workflow_dnr_because_parent(self, workflow_job_fn):
     wfj, nodes = workflow_job_fn(states=['successful', None, None, None, None, None,])
     dag = WorkflowDAG(workflow_job=wfj)
     workflow_nodes = dag.mark_dnr_nodes()
     assert 2 == len(workflow_nodes)
     assert nodes[3] in workflow_nodes
     assert nodes[4] in workflow_nodes
Ejemplo n.º 4
0
 def process_finished_workflow_jobs(self, workflow_jobs):
     result = []
     for workflow_job in workflow_jobs:
         dag = WorkflowDAG(workflow_job)
         status_changed = False
         if workflow_job.cancel_flag:
             workflow_job.workflow_nodes.filter(
                 do_not_run=False, job__isnull=True).update(do_not_run=True)
             logger.debug(
                 'Canceling spawned jobs of %s due to cancel flag.',
                 workflow_job.log_format)
             cancel_finished = dag.cancel_node_jobs()
             if cancel_finished:
                 logger.info(
                     'Marking %s as canceled, all spawned jobs have concluded.',
                     workflow_job.log_format)
                 workflow_job.status = 'canceled'
                 workflow_job.start_args = ''  # blank field to remove encrypted passwords
                 workflow_job.save(update_fields=['status', 'start_args'])
                 status_changed = True
         else:
             workflow_nodes = dag.mark_dnr_nodes()
             for n in workflow_nodes:
                 n.save(update_fields=['do_not_run'])
             is_done = dag.is_workflow_done()
             if not is_done:
                 continue
             has_failed, reason = dag.has_workflow_failed()
             logger.debug('Marking %s as %s.', workflow_job.log_format,
                          'failed' if has_failed else 'successful')
             result.append(workflow_job.id)
             new_status = 'failed' if has_failed else 'successful'
             logger.debug("Transitioning {} to {} status.".format(
                 workflow_job.log_format, new_status))
             update_fields = ['status', 'start_args']
             workflow_job.status = new_status
             if reason:
                 logger.info(
                     f'Workflow job {workflow_job.id} failed due to reason: {reason}'
                 )
                 workflow_job.job_explanation = gettext_noop(
                     "No error handling paths found, marking workflow as failed"
                 )
                 update_fields.append('job_explanation')
             workflow_job.start_args = ''  # blank field to remove encrypted passwords
             workflow_job.save(update_fields=update_fields)
             status_changed = True
         if status_changed:
             workflow_job.websocket_emit_status(workflow_job.status)
             # Operations whose queries rely on modifications made during the atomic scheduling session
             workflow_job.send_notification_templates(
                 'succeeded' if workflow_job.status ==
                 'successful' else 'failed')
             if workflow_job.spawned_by_workflow:
                 schedule_task_manager()
     return result
Ejemplo n.º 5
0
    def test_workflow_done(self):
        wfj = self.workflow_job(states=['failed', None, None, 'successful', None])
        dag = WorkflowDAG(workflow_job=wfj)
        assert 3 == len(dag.mark_dnr_nodes())
        is_done = dag.is_workflow_done()
        has_failed, reason = dag.has_workflow_failed()
        self.assertTrue(is_done)
        self.assertFalse(has_failed)
        assert reason is None

        # verify that relaunched WFJ fails if a JT leaf is deleted
        for jt in JobTemplate.objects.all():
            jt.delete()
        relaunched = wfj.create_relaunch_workflow_job()
        dag = WorkflowDAG(workflow_job=relaunched)
        dag.mark_dnr_nodes()
        is_done = dag.is_workflow_done()
        has_failed, reason = dag.has_workflow_failed()
        self.assertTrue(is_done)
        self.assertTrue(has_failed)
        assert "Workflow job node {} related unified job template missing".format(wfj.workflow_nodes.all()[0].id)
Ejemplo n.º 6
0
 def process_finished_workflow_jobs(self, workflow_jobs):
     result = []
     for workflow_job in workflow_jobs:
         dag = WorkflowDAG(workflow_job)
         status_changed = False
         if workflow_job.cancel_flag:
             workflow_job.workflow_nodes.filter(
                 do_not_run=False, job__isnull=True).update(do_not_run=True)
             logger.debug(
                 'Canceling spawned jobs of %s due to cancel flag.',
                 workflow_job.log_format)
             cancel_finished = dag.cancel_node_jobs()
             if cancel_finished:
                 logger.info(
                     'Marking %s as canceled, all spawned jobs have concluded.',
                     workflow_job.log_format)
                 workflow_job.status = 'canceled'
                 workflow_job.start_args = ''  # blank field to remove encrypted passwords
                 workflow_job.save(update_fields=['status', 'start_args'])
                 status_changed = True
         else:
             workflow_nodes = dag.mark_dnr_nodes()
             map(lambda n: n.save(update_fields=['do_not_run']),
                 workflow_nodes)
             is_done = dag.is_workflow_done()
             if not is_done:
                 continue
             has_failed, reason = dag.has_workflow_failed()
             logger.info('Marking %s as %s.', workflow_job.log_format,
                         'failed' if has_failed else 'successful')
             result.append(workflow_job.id)
             new_status = 'failed' if has_failed else 'successful'
             logger.debug(
                 six.text_type("Transitioning {} to {} status.").format(
                     workflow_job.log_format, new_status))
             update_fields = ['status', 'start_args']
             workflow_job.status = new_status
             if reason:
                 logger.info(reason)
                 workflow_job.job_explanation = "No error handling paths found, marking workflow as failed"
                 update_fields.append('job_explanation')
             workflow_job.start_args = ''  # blank field to remove encrypted passwords
             workflow_job.save(update_fields=update_fields)
             status_changed = True
         if status_changed:
             workflow_job.websocket_emit_status(workflow_job.status)
             if workflow_job.spawned_by_workflow:
                 schedule_task_manager()
     return result
Ejemplo n.º 7
0
    def spawn_workflow_graph_jobs(self):
        result = []
        for workflow_job in self.all_tasks:
            if self.timed_out():
                logger.warning(
                    "Workflow manager has reached time out while processing running workflows, exiting loop early"
                )
                ScheduleWorkflowManager().schedule()
                # Do not process any more workflow jobs. Stop here.
                # Maybe we should schedule another WorkflowManager run
                break
            dag = WorkflowDAG(workflow_job)
            status_changed = False
            if workflow_job.cancel_flag:
                workflow_job.workflow_nodes.filter(
                    do_not_run=False, job__isnull=True).update(do_not_run=True)
                logger.debug(
                    'Canceling spawned jobs of %s due to cancel flag.',
                    workflow_job.log_format)
                cancel_finished = dag.cancel_node_jobs()
                if cancel_finished:
                    logger.info(
                        'Marking %s as canceled, all spawned jobs have concluded.',
                        workflow_job.log_format)
                    workflow_job.status = 'canceled'
                    workflow_job.start_args = ''  # blank field to remove encrypted passwords
                    workflow_job.save(update_fields=['status', 'start_args'])
                    status_changed = True
            else:
                workflow_nodes = dag.mark_dnr_nodes()
                WorkflowJobNode.objects.bulk_update(workflow_nodes,
                                                    ['do_not_run'])
                # If workflow is now done, we do special things to mark it as done.
                is_done = dag.is_workflow_done()
                if is_done:
                    has_failed, reason = dag.has_workflow_failed()
                    logger.debug('Marking %s as %s.', workflow_job.log_format,
                                 'failed' if has_failed else 'successful')
                    result.append(workflow_job.id)
                    new_status = 'failed' if has_failed else 'successful'
                    logger.debug("Transitioning {} to {} status.".format(
                        workflow_job.log_format, new_status))
                    update_fields = ['status', 'start_args']
                    workflow_job.status = new_status
                    if reason:
                        logger.info(
                            f'Workflow job {workflow_job.id} failed due to reason: {reason}'
                        )
                        workflow_job.job_explanation = gettext_noop(
                            "No error handling paths found, marking workflow as failed"
                        )
                        update_fields.append('job_explanation')
                    workflow_job.start_args = ''  # blank field to remove encrypted passwords
                    workflow_job.save(update_fields=update_fields)
                    status_changed = True

            if status_changed:
                if workflow_job.spawned_by_workflow:
                    ScheduleWorkflowManager().schedule()
                workflow_job.websocket_emit_status(workflow_job.status)
                # Operations whose queries rely on modifications made during the atomic scheduling session
                workflow_job.send_notification_templates(
                    'succeeded' if workflow_job.status ==
                    'successful' else 'failed')

            if workflow_job.status == 'running':
                spawn_nodes = dag.bfs_nodes_to_run()
                if spawn_nodes:
                    logger.debug('Spawning jobs for %s',
                                 workflow_job.log_format)
                else:
                    logger.debug('No nodes to spawn for %s',
                                 workflow_job.log_format)
                for spawn_node in spawn_nodes:
                    if spawn_node.unified_job_template is None:
                        continue
                    kv = spawn_node.get_job_kwargs()
                    job = spawn_node.unified_job_template.create_unified_job(
                        **kv)
                    spawn_node.job = job
                    spawn_node.save()
                    logger.debug('Spawned %s in %s for node %s',
                                 job.log_format, workflow_job.log_format,
                                 spawn_node.pk)
                    can_start = True
                    if isinstance(spawn_node.unified_job_template,
                                  WorkflowJobTemplate):
                        workflow_ancestors = job.get_ancestor_workflows()
                        if spawn_node.unified_job_template in set(
                                workflow_ancestors):
                            can_start = False
                            logger.info(
                                'Refusing to start recursive workflow-in-workflow id={}, wfjt={}, ancestors={}'
                                .format(job.id,
                                        spawn_node.unified_job_template.pk,
                                        [wa.pk for wa in workflow_ancestors]))
                            display_list = [spawn_node.unified_job_template
                                            ] + workflow_ancestors
                            job.job_explanation = gettext_noop(
                                "Workflow Job spawned from workflow could not start because it "
                                "would result in recursion (spawn order, most recent first: {})"
                            ).format(', '.join('<{}>'.format(tmp)
                                               for tmp in display_list))
                        else:
                            logger.debug(
                                'Starting workflow-in-workflow id={}, wfjt={}, ancestors={}'
                                .format(job.id,
                                        spawn_node.unified_job_template.pk,
                                        [wa.pk for wa in workflow_ancestors]))
                    if not job._resources_sufficient_for_launch():
                        can_start = False
                        job.job_explanation = gettext_noop(
                            "Job spawned from workflow could not start because it was missing a related resource such as project or inventory"
                        )
                    if can_start:
                        if workflow_job.start_args:
                            start_args = json.loads(
                                decrypt_field(workflow_job, 'start_args'))
                        else:
                            start_args = {}
                        can_start = job.signal_start(**start_args)
                        if not can_start:
                            job.job_explanation = gettext_noop(
                                "Job spawned from workflow could not start because it was not in the right state or required manual credentials"
                            )
                    if not can_start:
                        job.status = 'failed'
                        job.save(update_fields=['status', 'job_explanation'])
                        job.websocket_emit_status('failed')

                    # TODO: should we emit a status on the socket here similar to tasks.py awx_periodic_scheduler() ?
                    # emit_websocket_notification('/socket.io/jobs', '', dict(id=))

        return result