def wrapper(*args, **kwargs):
     module = args[0]
     try:
         module._validate_job_ctx()
         module.job_log_util = JobLogUtils(
             sandesh_instance_id=str(uuid.uuid4()),
             config_args=json.dumps(module.job_ctx['config_args']))
         function(*args, **kwargs)
     except ValueError as verr:
         module.results['msg'] = str(verr)
         module.results['failed'] = True
         module.logger.error(str(verr))
     except Exception as ex:
         msg = "Failed object log due to error: %s\n\t \
                job name: %s\n\t \
                job execution id: %s\n" \
               % (str(ex), module.job_ctx['job_template_fqname'],
                  module.job_ctx['job_execution_id'])
         module.results['msg'] = msg
         module.results['failed'] = True
         module.logger.error(msg)
     finally:
         try:
             if module.job_log_util:
                 sandesh_util = SandeshUtils(
                     module.job_log_util.get_config_logger())
                 sandesh_util.close_sandesh_connection()
         except Exception as ex:
             module.logger.error("Unable to close sandesh connection: %s",
                                 str(ex))
Exemple #2
0
    def initialize_sandesh_logger(self, config_args, sandesh=True,
                                  sandesh_instance=None):
        # parse the logger args
        args = self.parse_logger_args(config_args)
        args.random_collectors = args.collectors
        if args.collectors:
            args.random_collectors = random.sample(args.collectors,
                                                   len(args.collectors))
            self.args = args
        # initialize logger
        logger = JobLogger(args=args,
                           sandesh_instance_id=self.sandesh_instance_id,
                           sandesh_instance=sandesh_instance)
        if not sandesh_instance and sandesh:
            try:
                sandesh_util = SandeshUtils(logger)
                sandesh_util.wait_for_connection_establish()
            except JobException:
                msg = MsgBundle.getMessage(
                    MsgBundle.SANDESH_INITIALIZATION_TIMEOUT_ERROR)
                raise JobException(msg)
            logger.info("Sandesh is initialized."
                        " Config logger instance created.")

        return logger
    def initialize_sandesh_logger(self, config_args, sandesh=True,
                                  sandesh_instance=None):
        # parse the logger args
        args = self.parse_logger_args(config_args)
        args.random_collectors = args.collectors
        if args.collectors:
            args.random_collectors = random.sample(args.collectors,
                                                   len(args.collectors))
            self.args = args
        # initialize logger
        logger = JobLogger(args=args,
                           sandesh_instance_id=self.sandesh_instance_id,
                           sandesh_instance=sandesh_instance)
        if not sandesh_instance and sandesh:
            try:
                sandesh_util = SandeshUtils(logger)
                sandesh_util.wait_for_connection_establish()
            except JobException:
                msg = MsgBundle.getMessage(
                    MsgBundle.SANDESH_INITIALIZATION_TIMEOUT_ERROR)
                raise JobException(msg)
            logger.info("Sandesh is initialized. Config logger instance created.")

        return logger
Exemple #4
0
    def start_job(self):
        job_error_msg = None
        job_template = None
        try:
            # create job UVE and log
            self.result_handler = JobResultHandler(self.job_template_id,
                                                   self.job_execution_id,
                                                   self.fabric_fq_name,
                                                   self._logger,
                                                   self.job_utils,
                                                   self.job_log_utils)

            job_template = self.job_utils.read_job_template()
            self.job_template = job_template

            msg = MsgBundle.getMessage(
                MsgBundle.START_JOB_MESSAGE,
                job_execution_id=self.job_execution_id,
                job_template_name=job_template.fq_name[-1])
            self._logger.debug(msg)

            timestamp = int(round(time.time() * 1000))
            self.job_log_utils.send_job_log(job_template.fq_name,
                                            self.job_execution_id,
                                            self.fabric_fq_name,
                                            msg,
                                            JobStatus.STARTING.value,
                                            timestamp=timestamp)

            # validate job input if required by job_template input_schema
            input_schema = job_template.get_job_template_input_schema()
            if input_schema:
                self._validate_job_input(input_schema, self.job_data)

            playbook_list = job_template.get_job_template_playbooks()\
                .get_playbook_info()

            job_percent = None
            # calculate job percentage for each playbook
            if len(playbook_list) > 1:
                task_weightage_array = [
                    pb_info.job_completion_weightage
                    for pb_info in playbook_list]

            for i in range(0, len(playbook_list)):

                # check if its a multi device playbook
                playbooks = job_template.get_job_template_playbooks()
                play_info = playbooks.playbook_info[i]
                multi_device_playbook = play_info.multi_device_playbook

                if len(playbook_list) > 1:
                    # get the job percentage based on weightage of each plabook
                    # when they are chained
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100, task_seq_number=i + 1,
                            task_weightage_array=task_weightage_array)[0]
                else:
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100)[0]  # using equal weightage

                retry_devices = None
                while True:
                    job_mgr = JobManager(self._logger, self._vnc_api,
                                         self.job_input, self.job_log_utils,
                                         job_template,
                                         self.result_handler, self.job_utils,
                                         i, job_percent, self._zk_client)
                    self.job_mgr = job_mgr
                    job_mgr.start_job()

                    # retry the playbook execution if retry_devices is added to
                    # the playbook output
                    job_status = self.result_handler.job_result_status
                    retry_devices = self.result_handler.get_retry_devices()
                    if job_status == JobStatus.FAILURE or not retry_devices \
                            or self.abort_flag:
                        break
                    self.job_input['device_json'] = retry_devices

                # update the job input with marked playbook output json
                pb_output = self.result_handler.playbook_output or {}

                if pb_output.get('early_exit'):
                    break

                # stop the workflow if playbook failed
                if self.result_handler.job_result_status == JobStatus.FAILURE:

                    # stop workflow only if its a single device job or
                    # it is a multi device playbook
                    # and all the devices have failed some job execution
                    # declare it as failure and the stop the workflow

                    if not multi_device_playbook or \
                            (multi_device_playbook and
                             len(self.result_handler.failed_device_jobs) ==
                             len(self.job_input.get('device_json'))):
                        self._logger.error(
                            "Stop the workflow on the failed Playbook.")
                        break

                    elif not retry_devices:
                        # it is a multi device playbook but one of
                        # the device jobs have failed. This means we should
                        # still declare the operation as success. We declare
                        # workflow as success even if one of the devices has
                        # succeeded the job

                        self.result_handler.job_result_status =\
                            JobStatus.SUCCESS

                if self.abort_flag:
                    err_msg = "ABORTING NOW..."
                    self._logger.info(err_msg)
                    self.result_handler.update_job_status(JobStatus.FAILURE, err_msg)
                    break

                # update the job input with marked playbook output json
                pb_output = self.result_handler.playbook_output or {}

                # read the device_data output of the playbook
                # and update the job input so that it can be used in next
                # iteration
                if not multi_device_playbook:
                    device_json = pb_output.pop('device_json', None)
                    self.job_input['device_json'] = device_json

                self.job_input.get('input', {}).update(pb_output)

            # create job completion log and update job UVE
            self.result_handler.create_job_summary_log(
                job_template.fq_name)

            # in case of failures, exit the job manager process with failure
            if self.result_handler.job_result_status == JobStatus.FAILURE:
                job_error_msg = self.result_handler.job_summary_message

        except JobException as exp:
            err_msg = "Job Exception recieved: %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE,
                                                  err_msg)
            if job_template:
                self.result_handler.create_job_summary_log(
                    job_template.fq_name)
            job_error_msg = err_msg
        except Exception as exp:
            err_msg = "Error while executing job %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE,
                                                  err_msg)
            self.result_handler.create_job_summary_log(job_template.fq_name)
            job_error_msg = err_msg
        finally:
            # need to wait for the last job log and uve update to complete
            # via sandesh and then close sandesh connection
            sandesh_util = SandeshUtils(self._logger)
            sandesh_util.close_sandesh_connection()
            self._logger.info("Closed Sandesh connection")
            if job_error_msg is not None:
                sys.exit(job_error_msg)
Exemple #5
0
    def start_job(self):
        self._logger.info("Starting Executable")
        job_error_msg = None
        job_template = self.job_template
        try:
            # create job UVE and log
            self.result_handler = JobResultHandler(self.job_template_id,
                                                   self.job_execution_id,
                                                   self.fabric_fq_name,
                                                   self._logger,
                                                   self.job_utils,
                                                   self.job_log_utils)


            msg = MsgBundle.getMessage(MsgBundle.START_JOB_MESSAGE,
                                       job_execution_id=self.job_execution_id,
                                       job_template_name=\
                                           job_template.fq_name[-1])
            self._logger.debug(msg)

            timestamp = int(round(time.time() * 1000))
            self.job_log_utils.send_job_log(job_template.fq_name,
                                            self.job_execution_id,
                                            self.fabric_fq_name,
                                            msg,
                                            JobStatus.STARTING.value,
                                            timestamp=timestamp)

            # validate job input if required by job_template input_schema
            input_schema = job_template.get_job_template_input_schema()
            if input_schema:
                self._validate_job_input(input_schema, self.job_data)

            executable_list = job_template.get_job_template_executables()\
                .get_executable_info()
            for executable in executable_list:
                exec_path = executable.get_executable_path()
                exec_args = executable.get_executable_args()
                job_input_args = self.gather_job_args()
                try:
                    exec_process = subprocess32.Popen([exec_path,
                                                   "--job-input",
                                                   json.dumps(job_input_args),
                                                   '--debug', 'True'],
                                                  close_fds=True, cwd='/',
                                                  stdout=subprocess32.PIPE,
                                                  stderr=subprocess32.PIPE)
                    self.job_file_write.write_to_file(
                        self.job_execution_id,
                        "job_summary",
                        JobFileWrite.JOB_LOG,
                        {"job_status": "INPROGRESS"})
                    msg = "Child process pid = " + str(exec_process.pid)
                    self._logger.info(msg)
                    (out, err) = exec_process.communicate(timeout=self.executable_timeout)

                    self._logger.notice(str(out))
                    self._logger.notice(str(err))
                except subprocess32.TimeoutExpired as timeout_exp:
                    if exec_process is not None:
                        os.kill(exec_process.pid, 9)
                        msg = MsgBundle.getMessage(
                                  MsgBundle.RUN_EXECUTABLE_PROCESS_TIMEOUT,
                                  exec_path=exec_path,
                                  exc_msg=repr(timeout_exp))
                        raise JobException(msg, self.job_execution_id)

                self._logger.info(exec_process.returncode)
                self._logger.info("Executable Completed")
                if exec_process.returncode != 0:
                     self.job_file_write.write_to_file(
                         self.job_execution_id,
                         "job_summary",
                         JobFileWrite.JOB_LOG,
                         {"job_status": "FAILED"})
                     msg = MsgBundle.getMessage(MsgBundle.
                                   EXECUTABLE_RETURN_WITH_ERROR,
                                   exec_uri=exec_path)
                     self._logger.error(msg)
                else:
                    self.job_file_write.write_to_file(
                        self.job_execution_id,
                        "job_summary",
                        JobFileWrite.JOB_LOG,
                        {"job_status": "COMPLETED"})


        except JobException as exp:
            err_msg = "Job Exception recieved: %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE,
                                                  err_msg)
            if job_template:
                self.result_handler.create_job_summary_log(
                    job_template.fq_name)
            job_error_msg = err_msg
        except Exception as exp:
            err_msg = "Error while executing job %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE,
                                                  err_msg)
            self.result_handler.create_job_summary_log(job_template.fq_name)
            job_error_msg = err_msg
        finally:
            # need to wait for the last job log and uve update to complete
            # via sandesh and then close sandesh connection
            sandesh_util = SandeshUtils(self._logger)
            sandesh_util.close_sandesh_connection()
            self._logger.info("Closed Sandesh connection")
            if job_error_msg is not None:
                sys.exit(job_error_msg)
    def start_job(self):
        job_error_msg = None
        job_template = None
        try:
            # create job UVE and log
            msg = MsgBundle.getMessage(MsgBundle.START_JOB_MESSAGE,
                                       job_execution_id=self.job_execution_id)
            self._logger.debug(msg)

            self.result_handler = JobResultHandler(self.job_template_id,
                                                   self.job_execution_id,
                                                   self.fabric_fq_name,
                                                   self._logger,
                                                   self.job_utils,
                                                   self.job_log_utils)

            job_template = self.job_utils.read_job_template()

            timestamp = int(round(time.time() * 1000))
            self.job_log_utils.send_job_log(job_template.fq_name,
                                            self.job_execution_id,
                                            self.fabric_fq_name,
                                            msg,
                                            JobStatus.STARTING.value,
                                            timestamp=timestamp)

            # validate job input if required by job_template input_schema
            input_schema = job_template.get_job_template_input_schema()
            if input_schema:
                self._validate_job_input(input_schema, self.job_data)

            playbook_list = job_template.get_job_template_playbooks()\
                .get_playbook_info()

            job_percent = None
            # calculate job percentage for each playbook
            if len(playbook_list) > 1:
                task_weightage_array = [
                    pb_info.job_completion_weightage
                    for pb_info in playbook_list
                ]

            for i in range(0, len(playbook_list)):

                if len(playbook_list) > 1:
                    # get the job percentage based on weightage of each plabook
                    # when they are chained
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100, task_seq_number=i + 1,
                            task_weightage_array=task_weightage_array)[0]
                else:
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100)[0]  # using equal weightage

                job_mgr = JobManager(self._logger, self._vnc_api,
                                     self.job_input, self.job_log_utils,
                                     job_template, self.result_handler,
                                     self.job_utils, i, job_percent)

                job_mgr.start_job()

                # stop the workflow if playbook failed
                if self.result_handler.job_result_status == JobStatus.FAILURE:
                    self._logger.error(
                        "Stop the workflow on the failed Playbook.")
                    break

                # update the job input with marked playbook output json
                pb_output = self.result_handler.playbook_output or {}

                # read the device_data output of the playbook
                # and update the job input so that it can be used in next
                # iteration
                if not self.job_input.get('device_json'):
                    device_json = pb_output.get('device_json')
                    self.job_input['device_json'] = device_json

                if not self.job_input.get('prev_pb_output'):
                    self.job_input['prev_pb_output'] = pb_output
                else:
                    self.job_input['prev_pb_output'].update(pb_output)
                self.job_input.get('input', {}).update(pb_output)

            # create job completion log and update job UVE
            self.result_handler.create_job_summary_log(job_template.fq_name)

            # in case of failures, exit the job manager process with failure
            if self.result_handler.job_result_status == JobStatus.FAILURE:
                job_error_msg = self.result_handler.job_summary_message

        except JobException as exp:
            err_msg = "Job Exception recieved: %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE, err_msg)
            if job_template:
                self.result_handler.create_job_summary_log(
                    job_template.fq_name)
            job_error_msg = err_msg
        except Exception as exp:
            err_msg = "Error while executing job %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE, err_msg)
            self.result_handler.create_job_summary_log(job_template.fq_name)
            job_error_msg = err_msg
        finally:
            # need to wait for the last job log and uve update to complete
            # via sandesh and then close sandesh connection
            sandesh_util = SandeshUtils(self._logger)
            sandesh_util.close_sandesh_connection()
            self._logger.info("Closed Sandesh connection")
            if job_error_msg is not None:
                sys.exit(job_error_msg)
    def start_job(self):
        job_error_msg = None
        job_template = None
        try:
            # create job UVE and log
            self.result_handler = JobResultHandler(self.job_template_id,
                                                   self.job_execution_id,
                                                   self.fabric_fq_name,
                                                   self._logger,
                                                   self.job_utils,
                                                   self.job_log_utils)

            job_template = self.job_utils.read_job_template()

            msg = MsgBundle.getMessage(MsgBundle.START_JOB_MESSAGE,
                                       job_execution_id=self.job_execution_id,
                                       job_template_name=\
                                           job_template.fq_name[-1])
            self._logger.debug(msg)

            timestamp = int(round(time.time() * 1000))
            self.job_log_utils.send_job_log(job_template.fq_name,
                                            self.job_execution_id,
                                            self.fabric_fq_name,
                                            msg,
                                            JobStatus.STARTING.value,
                                            timestamp=timestamp)

            # validate job input if required by job_template input_schema
            input_schema = job_template.get_job_template_input_schema()
            if input_schema:
                self._validate_job_input(input_schema, self.job_data)

            playbook_list = job_template.get_job_template_playbooks()\
                .get_playbook_info()

            job_percent = None
            # calculate job percentage for each playbook
            if len(playbook_list) > 1:
                task_weightage_array = [
                    pb_info.job_completion_weightage
                    for pb_info in playbook_list]

            for i in range(0, len(playbook_list)):

                if len(playbook_list) > 1:
                    # get the job percentage based on weightage of each plabook
                    # when they are chained
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100, task_seq_number=i + 1,
                            task_weightage_array=task_weightage_array)[0]
                else:
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100)[0]  # using equal weightage

                retry_devices = None
                while True:
                    job_mgr = JobManager(self._logger, self._vnc_api,
                                         self.job_input, self.job_log_utils,
                                         job_template,
                                         self.result_handler, self.job_utils, i,
                                         job_percent, self._zk_client,
                                         self.db_init_params,
                                         self.cluster_id)
                    job_mgr.start_job()

                    # retry the playbook execution if retry_devices is added to
                    # the playbook output
                    job_status = self.result_handler.job_result_status
                    retry_devices = self.result_handler.get_retry_devices()
                    if job_status == JobStatus.FAILURE or not retry_devices:
                        break
                    self.job_input['device_json'] = retry_devices

                # stop the workflow if playbook failed
                if self.result_handler.job_result_status == JobStatus.FAILURE:

                    # stop workflow only if its a single device job or
                    # it is a multi device playbook
                    # and all the devices have failed some job execution
                    # declare it as failure and the stop the workflow

                    if self.job_input.get('device_json') is None or\
                        len(self.result_handler.failed_device_jobs)\
                            == len(self.job_input.get('device_json')):
                        self._logger.error(
                            "Stop the workflow on the failed Playbook.")
                        break

                    elif not retry_devices:
                        # it is a multi device playbook but one of the device jobs
                        # have failed. This means we should still declare
                        # the operation as success. We declare workflow as
                        # success even if one of the devices has succeeded the job

                        self.result_handler.job_result_status = JobStatus.SUCCESS

                # update the job input with marked playbook output json
                pb_output = self.result_handler.playbook_output or {}

                # read the device_data output of the playbook
                # and update the job input so that it can be used in next
                # iteration
                if not self.job_input.get('device_json'):
                    device_json = pb_output.pop('device_json', None)
                    self.job_input['device_json'] = device_json

                self.job_input.get('input', {}).update(pb_output)

            # create job completion log and update job UVE
            self.result_handler.create_job_summary_log(
                job_template.fq_name)

            # in case of failures, exit the job manager process with failure
            if self.result_handler.job_result_status == JobStatus.FAILURE:
                job_error_msg = self.result_handler.job_summary_message

        except JobException as exp:
            err_msg = "Job Exception recieved: %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE,
                                                  err_msg)
            if job_template:
                self.result_handler.create_job_summary_log(
                    job_template.fq_name)
            job_error_msg = err_msg
        except Exception as exp:
            err_msg = "Error while executing job %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE,
                                                  err_msg)
            self.result_handler.create_job_summary_log(job_template.fq_name)
            job_error_msg = err_msg
        finally:
            # need to wait for the last job log and uve update to complete
            # via sandesh and then close sandesh connection
            sandesh_util = SandeshUtils(self._logger)
            sandesh_util.close_sandesh_connection()
            self._logger.info("Closed Sandesh connection")
            if job_error_msg is not None:
                sys.exit(job_error_msg)
    def start_job(self):
        job_error_msg = None
        job_template = None
        try:
            # create job UVE and log
            job_template = self.job_utils.read_job_template()
            self.job_template = job_template
            self.job_description = self.job_template.display_name
            if not self.job_transaction_descr:
                self.job_transaction_descr = self._generate_transaction_descr()

            self.result_handler = JobResultHandler(
                self.job_template_id, self.job_execution_id,
                self.fabric_fq_name, self._logger, self.job_utils,
                self.job_log_utils, self.device_name, self.job_description,
                self.job_transaction_id, self.job_transaction_descr)

            msg = MsgBundle.getMessage(
                MsgBundle.START_JOB_MESSAGE,
                job_execution_id=self.job_execution_id,
                job_template_name=job_template.fq_name[-1])
            self._logger.debug(msg)

            timestamp = int(round(time.time() * 1000))
            self.job_log_utils.send_job_log(
                job_template.fq_name,
                self.job_execution_id,
                self.fabric_fq_name,
                msg,
                JobStatus.STARTING.value,
                timestamp=timestamp,
                device_name=self.device_name,
                description=self.job_description,
                transaction_id=self.job_transaction_id,
                transaction_descr=self.job_transaction_descr)

            # validate job input if required by job_template input_schema
            input_schema = job_template.get_job_template_input_schema()
            if input_schema:
                self._validate_job_input(input_schema, self.job_data)

            playbook_list = job_template.get_job_template_playbooks()\
                .get_playbook_info()

            job_percent = None
            # calculate job percentage for each playbook
            if len(playbook_list) > 1:
                task_weightage_array = [
                    pb_info.job_completion_weightage
                    for pb_info in playbook_list
                ]

            cleanup_in_progress = False
            cleanup_completed = False
            pb_idx = 0

            while pb_idx < len(playbook_list):

                # check if its a multi device playbook
                playbooks = job_template.get_job_template_playbooks()
                play_info = playbooks.playbook_info[pb_idx]
                multi_device_playbook = play_info.multi_device_playbook
                playbook_name = play_info.playbook_uri.split('/')[-1]

                if cleanup_in_progress:
                    # If we need to cleanup due to a previous error, ignore
                    # any playbooks that don't perform recovery
                    if not play_info.recovery_playbook:
                        self._logger.info("Ignoring playbook %s since it "
                                          "does not perform recovery" %
                                          playbook_name)
                        pb_idx += 1
                        continue

                    # If we are running a recovery playbook, then
                    # cleanup_completed needs to be set irrespective of
                    # a success or error in recovery playbook execution
                    else:
                        self._logger.info("Running recovery playbook %s" %
                                          playbook_name)
                        cleanup_completed = True
                else:
                    # Don't run a recovery playbook if we haven't hit an error
                    if play_info.recovery_playbook:
                        self._logger.info(
                            "Ignoring recovery playbook %s since we "
                            "haven't hit an error" % playbook_name)
                        pb_idx += 1
                        continue

                if len(playbook_list) > 1:
                    # get the job percentage based on weightage of each plabook
                    # when they are chained
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100, task_seq_number=pb_idx + 1,
                            task_weightage_array=task_weightage_array)[0]
                else:
                    job_percent = \
                        self.job_log_utils.calculate_job_percentage(
                            len(playbook_list), buffer_task_percent=True,
                            total_percent=100)[0]  # using equal weightage

                retry_devices = None
                while True:
                    job_mgr = JobManager(self._logger, self._vnc_api,
                                         self.job_input, self.job_log_utils,
                                         job_template, self.result_handler,
                                         self.job_utils, pb_idx, job_percent,
                                         self._zk_client, self.job_description,
                                         self.job_transaction_id,
                                         self.job_transaction_descr)
                    self.job_mgr = job_mgr
                    job_mgr.start_job()

                    # retry the playbook execution if retry_devices is added to
                    # the playbook output
                    job_status = self.result_handler.job_result_status
                    retry_devices = self.result_handler.get_retry_devices()
                    failed_device_list = self.result_handler\
                        .get_failed_device_list()
                    if job_status == JobStatus.FAILURE or not retry_devices \
                            or self.abort_flag:
                        break
                    self.job_input['device_json'] = retry_devices
                    self.job_input['input']['failed_list'] = failed_device_list

                # update the job input with marked playbook output json
                pb_output = self.result_handler.playbook_output or {}

                if pb_output.get('early_exit'):
                    break

                # stop the workflow if playbook failed
                if self.result_handler.job_result_status == JobStatus.FAILURE:

                    # If it is a single device job or
                    # if it is a multi device playbook
                    # and all the devices have failed some job execution,
                    # declare it as failure, perform cleanup if possible
                    # and then stop the workflow

                    if not multi_device_playbook or \
                            (multi_device_playbook and
                             len(self.result_handler.failed_device_jobs) ==
                             len(self.job_input.get('device_json'))):
                        if not cleanup_in_progress:
                            cleanup_in_progress = True
                            pb_idx = 0
                            self._logger.info("Stop the workflow on the failed"
                                              " Playbook and start cleanup")
                        else:
                            pb_idx += 1
                        continue

                    elif not retry_devices:
                        # it is a multi device playbook but one of
                        # the device jobs have failed. This means we should
                        # still declare the operation as success. We declare
                        # workflow as success even if one of the devices has
                        # succeeded the job

                        self.result_handler.job_result_status =\
                            JobStatus.SUCCESS

                if self.abort_flag:
                    err_msg = "ABORTING NOW..."
                    self._logger.info(err_msg)
                    self.result_handler.update_job_status(
                        JobStatus.FAILURE, err_msg)
                    break

                # update the job input with marked playbook output json
                pb_output = self.result_handler.playbook_output or {}

                # read the device_data output of the playbook
                # and update the job input so that it can be used in next
                # iteration
                if not multi_device_playbook:
                    device_json = pb_output.pop('device_json', None)
                    self.job_input['device_json'] = device_json

                self.job_input.get('input', {}).update(pb_output)

                pb_idx += 1

            # A successful recovery playbook execution might
            # set JobStatus to success but this does not indicate a
            # success in the workflow. Set JobStatus to failure again.
            if cleanup_completed:
                err_msg = "Finished cleaning up after the error"
                self.result_handler.update_job_status(JobStatus.FAILURE,
                                                      err_msg)
                cleanup_completed = False
                cleanup_in_progress = False

            # create job completion log and update job UVE
            self.result_handler.create_job_summary_log(job_template.fq_name)

            # in case of failures, exit the job manager process with failure
            if self.result_handler.job_result_status == JobStatus.FAILURE:
                job_error_msg = self.result_handler.job_summary_message

        except JobException as exp:
            err_msg = "Job Exception recieved: %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE, err_msg)
            if job_template:
                self.result_handler.create_job_summary_log(
                    job_template.fq_name)
            job_error_msg = err_msg
        except Exception as exp:
            err_msg = "Error while executing job %s " % repr(exp)
            self._logger.error(err_msg)
            self._logger.error("%s" % traceback.format_exc())
            self.result_handler.update_job_status(JobStatus.FAILURE, err_msg)
            self.result_handler.create_job_summary_log(job_template.fq_name)
            job_error_msg = err_msg
        finally:
            # need to wait for the last job log and uve update to complete
            # via sandesh and then close sandesh connection
            sandesh_util = SandeshUtils(self._logger)
            sandesh_util.close_sandesh_connection()
            self._logger.info("Closed Sandesh connection")
            if job_error_msg is not None:
                sys.exit(job_error_msg)
 def close_sandesh_conn(self):
     try:
         sandesh_util = SandeshUtils(self.job_log_util.get_config_logger())
         sandesh_util.close_sandesh_connection()
     except Exception as e:
         logging.error("Unable to close sandesh connection: %s", str(e))