def job_breakdown_stats(self): try: workflow = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), True ) workflow.initialize(root_wf_id=self._wf_id) content = [] for t in workflow.get_transformation_statistics(): content.append( [ t.transformation, int(t.count), int(t.success), int(t.failure), float(t.min), float(t.max), float(t.avg), float(t.sum) ] ) return content finally: Dashboard.close(workflow)
def plots_time_chart(self, wf_id, time_filter='hour'): try: workflow = queries.WorkflowInfo(self.__get_wf_db_url(), wf_id) details = workflow.get_workflow_information() workflow_plots = stampede_statistics.StampedeStatistics( self.__get_wf_db_url() ) workflow_plots.initialize(details.wf_uuid) workflow_plots.set_job_filter('nonsub') workflow_plots.set_time_filter(time_filter) workflow_plots.set_transformation_filter( exclude=['condor::dagman'] ) job, invocation = workflow_plots.get_jobs_run_by_time( ), workflow_plots.get_invocation_by_time() for j in job: j.date_format *= 3600 for i in invocation: i.date_format *= 3600 return job, invocation finally: Dashboard.close(workflow) Dashboard.close(workflow_plots)
def job_breakdown_stats(self): try: workflow = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), True) workflow.initialize(root_wf_id=self._wf_id) content = [] for t in workflow.get_transformation_statistics(): content.append([ t.transformation, t.type, int(t.count), "{:.2f}".format(t.min), "{:.2f}".format(t.max), "{:.2f}".format(t.avg), "{:.2f}".format(t.sum), "{:.2f}".format(t.min_maxrss / 1024) if t.min_maxrss else "-", "{:.2f}".format(t.max_maxrss / 1024) if t.max_maxrss else "-", "{:.2f}".format(t.avg_maxrss / 1024) if t.avg_maxrss else "-", "{:.2f}%".format(t.min_avg_cpu * 100) if t.min_avg_cpu else "-", "{:.2f}%".format(t.max_avg_cpu * 100) if t.max_avg_cpu else "-", "{:.2f}%".format(t.avg_avg_cpu * 100) if t.avg_avg_cpu else "-", ]) return content finally: Dashboard.close(workflow)
def integrity_stats(self): try: workflow = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), False ) workflow.initialize(root_wf_id=self._wf_id) individual_stats = self._integrity_stats(workflow) workflow2 = stampede_statistics.StampedeStatistics(self.__get_wf_db_url()) workflow2.initialize(self._root_wf_uuid) all_stats = self._integrity_stats(workflow2) return {"individual": individual_stats, "all": all_stats} finally: Dashboard.close(workflow) Dashboard.close(workflow2)
def workflow_summary_stats(self, wf_id=None, wf_uuid=None): try: workflow = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), expand_workflow=False) workflow.initialize(root_wf_id=self._wf_id) dictionary = self._get_workflow_summary_times(workflow) dictionary["retry-count"] = self._get_workflow_retries(workflow) return dictionary finally: Dashboard.close(workflow)
def plots_gantt_chart(self): try: # Expand has to be set to false. The method does not provide information when expand set to True. workflow = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), False) workflow.initialize(root_wf_id=self._wf_id) gantt_chart = workflow.get_job_states() return gantt_chart finally: Dashboard.close(workflow)
def job_stats(self): try: workflow = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), False) workflow.initialize(root_wf_id=self._wf_id) workflow.set_job_filter('all') job_retry_count_dict = {} content = [] for job in workflow.get_job_statistics(): kickstart = '0' if job.kickstart == None else float( job.kickstart) multiplier_factor = '0' if job.multiplier_factor == None else int( job.multiplier_factor) kickstart_multi = '0' if job.kickstart_multi == None else float( job.kickstart_multi) remote_cpu_time = '0' if job.remote_cpu_time == None else float( job.remote_cpu_time) post_time = '0' if job.post_time == None else float( job.post_time) condor_q_time = '0' if job.condor_q_time == None else float( job.condor_q_time) resource_delay = '0' if job.resource_delay == None else float( job.resource_delay) runtime = '0' if job.runtime == None else float(job.runtime) seqexec = '-' if job.seqexec == None else float(job.seqexec) seqexec_delay = '-' if job.seqexec is not None and job.kickstart is not None: seqexec_delay = (float(job.seqexec) - float(job.kickstart)) if job_retry_count_dict.has_key(job.job_name): job_retry_count_dict[job.job_name] += 1 else: job_retry_count_dict[job.job_name] = 1 retry_count = job_retry_count_dict[job.job_name] content.append([ job.job_name, retry_count, job.site, kickstart, multiplier_factor, kickstart_multi, remote_cpu_time, post_time, condor_q_time, resource_delay, runtime, seqexec, seqexec_delay, utils.raw_to_regular(job.exit_code), job.host_name ]) return content finally: Dashboard.close(workflow)
def plots_transformation_statistics(self, wf_id): try: workflow = queries.WorkflowInfo(self.__get_wf_db_url(), wf_id) details = workflow.get_workflow_information() workflow_plots = stampede_statistics.StampedeStatistics( self.__get_wf_db_url() ) workflow_plots.initialize(details.wf_uuid) workflow_plots.set_job_filter("nonsub") workflow_plots.set_time_filter("hour") workflow_plots.set_transformation_filter(exclude=["condor::dagman"]) dist = workflow_plots.get_transformation_statistics() return dist finally: Dashboard.close(workflow) Dashboard.close(workflow_plots)
def get_workflow_information(self, wf_id=None, wf_uuid=None): """ Get workflow specific information. This is when user click on a workflow link. Returns a workflow object. """ try: if not wf_id and not wf_uuid: raise ValueError('Workflow ID or Workflow UUID is required') workflow = None workflow_statistics = None workflow = queries.WorkflowInfo( self.__get_wf_db_url(), wf_id=wf_id, wf_uuid=wf_uuid ) details = self._get_workflow_details(workflow) job_counts = self._get_workflow_job_counts(workflow) #workflow_statistics = stampede_statistics.StampedeStatistics(self.__get_wf_db_url(), expand_workflow=(details.root_wf_id == details.wf_id)) workflow_statistics = stampede_statistics.StampedeStatistics( self.__get_wf_db_url(), expand_workflow=True ) workflow_statistics.initialize(details.wf_uuid) statistics = {} statistics.update( self._get_workflow_summary_times(workflow_statistics) ) #if details.root_wf_id == details.wf_id: statistics.update( self._get_workflow_summary_counts(workflow_statistics) ) return job_counts, details, statistics finally: Dashboard.close(workflow) Dashboard.close(workflow_statistics)
def plots_time_chart(self, wf_id, time_filter="hour"): try: workflow = queries.WorkflowInfo(self.__get_wf_db_url(), wf_id) details = workflow.get_workflow_information() workflow_plots = stampede_statistics.StampedeStatistics( self.__get_wf_db_url() ) workflow_plots.initialize(details.wf_uuid) workflow_plots.set_job_filter("nonsub") workflow_plots.set_time_filter(time_filter) workflow_plots.set_transformation_filter(exclude=["condor::dagman"]) job, invocation = ( workflow_plots.get_jobs_run_by_time(), workflow_plots.get_invocation_by_time(), ) return job, invocation finally: Dashboard.close(workflow) Dashboard.close(workflow_plots)