Example #1
0
    def job_stats(self):
        try:
            workflow = stampede_statistics.StampedeStatistics(self.__get_wf_db_url(), False)
            workflow.initialize(root_wf_id=self._wf_id)
            workflow.set_job_filter("all")

            job_retry_count_dict = {}
            content = []

            for job in workflow.get_job_statistics():

                kickstart = "0" if job.kickstart == None else float(job.kickstart)
                multiplier_factor = "0" if job.multiplier_factor == None else int(job.multiplier_factor)
                kickstart_multi = "0" if job.kickstart_multi == None else float(job.kickstart_multi)
                remote_cpu_time = "0" if job.remote_cpu_time == None else float(job.remote_cpu_time)
                post_time = "0" if job.post_time == None else float(job.post_time)
                condor_q_time = "0" if job.condor_q_time == None else float(job.condor_q_time)
                resource_delay = "0" if job.resource_delay == None else float(job.resource_delay)
                runtime = "0" if job.runtime == None else float(job.runtime)
                seqexec = "-" if job.seqexec == None else float(job.seqexec)

                seqexec_delay = "-"
                if job.seqexec is not None and job.kickstart is not None:
                    seqexec_delay = float(job.seqexec) - float(job.kickstart)

                if job_retry_count_dict.has_key(job.job_name):
                    job_retry_count_dict[job.job_name] += 1
                else:
                    job_retry_count_dict[job.job_name] = 1

                retry_count = job_retry_count_dict[job.job_name]

                content.append(
                    [
                        job.job_name,
                        retry_count,
                        job.site,
                        kickstart,
                        multiplier_factor,
                        kickstart_multi,
                        remote_cpu_time,
                        post_time,
                        condor_q_time,
                        resource_delay,
                        runtime,
                        seqexec,
                        seqexec_delay,
                        utils.raw_to_regular(job.exit_code),
                        job.host_name,
                    ]
                )

            return content

        finally:
            Dashboard.close(workflow)
Example #2
0
    def job_stats(self):
        try:
            workflow = stampede_statistics.StampedeStatistics(
                self.__get_wf_db_url(), False)
            workflow.initialize(root_wf_id=self._wf_id)
            workflow.set_job_filter('all')

            job_retry_count_dict = {}
            content = []

            for job in workflow.get_job_statistics():

                kickstart = '0' if job.kickstart == None else float(
                    job.kickstart)
                multiplier_factor = '0' if job.multiplier_factor == None else int(
                    job.multiplier_factor)
                kickstart_multi = '0' if job.kickstart_multi == None else float(
                    job.kickstart_multi)
                remote_cpu_time = '0' if job.remote_cpu_time == None else float(
                    job.remote_cpu_time)
                post_time = '0' if job.post_time == None else float(
                    job.post_time)
                condor_q_time = '0' if job.condor_q_time == None else float(
                    job.condor_q_time)
                resource_delay = '0' if job.resource_delay == None else float(
                    job.resource_delay)
                runtime = '0' if job.runtime == None else float(job.runtime)
                seqexec = '-' if job.seqexec == None else float(job.seqexec)

                seqexec_delay = '-'
                if job.seqexec is not None and job.kickstart is not None:
                    seqexec_delay = (float(job.seqexec) - float(job.kickstart))

                if job_retry_count_dict.has_key(job.job_name):
                    job_retry_count_dict[job.job_name] += 1
                else:
                    job_retry_count_dict[job.job_name] = 1

                retry_count = job_retry_count_dict[job.job_name]

                content.append([
                    job.job_name, retry_count, job.site, kickstart,
                    multiplier_factor, kickstart_multi, remote_cpu_time,
                    post_time, condor_q_time, resource_delay, runtime, seqexec,
                    seqexec_delay,
                    utils.raw_to_regular(job.exit_code), job.host_name
                ])

            return content

        finally:
            Dashboard.close(workflow)
Example #3
0
    def createWorkflowEvent (self, record): 

        ''' Map a result set into an event object. '''
        event = WorkflowEvent ()
        event.name = record.exec_job_id
        event.is_dax = record.exec_job_id.find ('subdax_') == 0
        event.site = record.site
        event.timestamp = record.timestamp
        event.exitcode = utils.raw_to_regular (record.exitcode)
        event.hostname = record.hostname
        event.sched_id = record.sched_id
        event.work_dir = os.path.abspath (record.work_dir)
        event.work_dir = event.work_dir.replace ("work/outputs", "work")
        event.dax_file = os.path.basename (record.dax_file)  #os.path.basename (record.dax_file) if record.dax_file else StatsUtil.formDaxName (event.workdir)
        event.state = record.state
        event.work_dir = record.work_dir 
        event.stdout = record.stdout_file
        event.stderr = record.stderr_file
        
        #logger.debug (json.dumps (event, sort_keys=True, indent=3, cls=WorkflowEventEncoder))

        return event

        '''
Example #4
0
    def process_invocation_notifications(self, wf, job, task_id, record=None):
        """
        This function takes care of processing invocation-level notifications.
        """
        if record is None:
            record = {}

        # Check if we have notifications for this workflow
        if not wf._wf_uuid in self._notifications:
            return

        # Get the notifications' dictionary for this workflow id
        wf_notifications = self._notifications[wf._wf_uuid]

        if "invocation" in wf_notifications:
            my_dict = wf_notifications["invocation"]
        else:
            logger.warning(
                "notification structure missing invocation entry...")
            return

        # Check if we have notifications for this job
        if not job._exec_job_id in my_dict:
            return

        # Advance to the task dictionary
        my_dict = my_dict[job._exec_job_id]

        # Check if we have notifications for this invocation
        if not task_id in my_dict:
            return

        my_notifications = my_dict[task_id]

        # Now, match the invocation state to the condition in the notification
        for k in my_notifications:
            # Look up the actions for this notification now
            my_actions = my_notifications[k]
            if "raw" in record:
                my_status = record["raw"]
            else:
                my_status = job._main_job_exitcode
            # Convert exitcode to int
            try:
                my_status = int(my_status)
            except ValueError:
                pass
            # Now, compare to the notification condition(s)
            if my_status == 0:
                if k == "on_error":
                    continue
            if my_status != 0:
                if k == "on_success":
                    continue
            if k == "all":
                k = "at_end"

            # Here, we always use the rotated file names as the invocation has already finished...
            my_output = os.path.join(
                wf._original_submit_dir,
                job._output_file) + ".%03d" % (job._job_output_counter)
            my_error = os.path.join(
                wf._original_submit_dir,
                job._error_file) + ".%03d" % (job._job_output_counter)

            # Ok, we have a match!
            for action in my_actions:
                # Create dictionary with needed environment variables
                my_env = {}
                my_env["PEGASUS_EVENT"] = k
                my_env["PEGASUS_EVENT_TIMESTAMP"] = str(wf._current_timestamp)
                my_env["PEGASUS_EVENT_TIMESTAMP_ISO"] = utils.isodate(
                    wf._current_timestamp)
                my_env["PEGASUS_SUBMIT_DIR"] = wf._original_submit_dir
                my_env["PEGASUS_JOBID"] = job._exec_job_id
                my_env["PEGASUS_INVID"] = str(task_id)
                my_env["PEGASUS_WFID"] = ((wf._dax_label or "unknown") + "-" +
                                          (wf._dax_index or "unknown"))
                my_env["PEGASUS_STDOUT"] = my_output
                my_env["PEGASUS_STDERR"] = my_error
                if k != "start":
                    # Convert raw exitcode into human-parseable format
                    my_env["PEGASUS_STATUS"] = str(
                        utils.raw_to_regular(my_status))

                # Done, queue the notification
                self._pending_notifications.append((action, my_env))
Example #5
0
    def process_invocation_notifications(self, wf, job, task_id, record=None):
        """
        This function takes care of processing invocation-level notifications.
        """
        if record is None:
            record = {}

        # Check if we have notifications for this workflow
        if not wf._wf_uuid in self._notifications:
            return

        # Get the notifications' dictionary for this workflow id
        wf_notifications = self._notifications[wf._wf_uuid]

        if "invocation" in wf_notifications:
            my_dict = wf_notifications["invocation"]
        else:
            logger.warning("notification structure missing invocation entry...")
            return

        # Check if we have notifications for this job
        if not job._exec_job_id in my_dict:
            return

        # Advance to the task dictionary
        my_dict = my_dict[job._exec_job_id]

        # Check if we have notifications for this invocation
        if not task_id in my_dict:
            return

        my_notifications = my_dict[task_id]

        # Now, match the invocation state to the condition in the notification
        for k in my_notifications:
            # Look up the actions for this notification now
            my_actions = my_notifications[k]
            if "raw" in record:
                my_status = record["raw"]
            else:
                my_status = job._main_job_exitcode
            # Convert exitcode to int
            try:
                my_status = int(my_status)
            except ValueError:
                pass
            # Now, compare to the notification condition(s)
            if my_status == 0:
                if k == "on_error":
                    continue
            if my_status != 0:
                if k == "on_success":
                    continue
            if k == "all":
                k = "at_end"

            # Here, we always use the rotated file names as the invocation has already finished...
            my_output = os.path.join(wf._original_submit_dir, job._output_file) + ".%03d" % (job._job_output_counter)
            my_error = os.path.join(wf._original_submit_dir, job._error_file) + ".%03d" % (job._job_output_counter)

            # Ok, we have a match!
            for action in my_actions:
                # Create dictionary with needed environment variables
                my_env = {}
                my_env["PEGASUS_EVENT"] = k
                my_env["PEGASUS_EVENT_TIMESTAMP"] = str(wf._current_timestamp)
                my_env["PEGASUS_EVENT_TIMESTAMP_ISO"] = utils.isodate(wf._current_timestamp)
                my_env["PEGASUS_SUBMIT_DIR"] = wf._original_submit_dir
                my_env["PEGASUS_JOBID"] = job._exec_job_id
                my_env["PEGASUS_INVID"] = str(task_id)
                my_env["PEGASUS_WFID"] = ((wf._dax_label or "unknown") +
                                          "-" + (wf._dax_index or "unknown"))
                my_env["PEGASUS_STDOUT"] = my_output
                my_env["PEGASUS_STDERR"] = my_error
                if k != "start":
                    # Convert raw exitcode into human-parseable format
                    my_env["PEGASUS_STATUS"] = str(utils.raw_to_regular(my_status))

                # Done, queue the notification
                self._pending_notifications.append((action, my_env))