class DashboardLocal(LocalEventHandler):
	alias_list = ['dashboard']
	config_section_list = LocalEventHandler.config_section_list + ['dashboard']

	def __init__(self, config, name, task):
		LocalEventHandler.__init__(self, config, name, task)
		self._app = config.get('application', 'shellscript', on_change=None)
		self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None)
		self._tasktype = config.get('task', 'analysis', on_change=None)
		self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None)
		self._map_status_job2dashboard = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE',
			Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'}
		self._tp = GCThreadPool()

	def on_job_output(self, wms, job_obj, jobnum, exit_code):
		self._update_dashboard(wms, job_obj, jobnum, job_obj, {'ExeExitCode': exit_code})

	def on_job_submit(self, wms, job_obj, jobnum):
		# Called on job submission
		token = wms.get_access_token(job_obj.gc_id)
		job_config_dict = self._task.get_job_dict(jobnum)
		self._start_publish(job_obj, jobnum, 'submission', [{'user': get_local_username(),
			'GridName': '/CN=%s' % token.get_user_name(), 'CMSUser': token.get_user_name(),
			'tool': 'grid-control', 'JSToolVersion': get_version(),
			'SubmissionType': 'direct', 'tool_ui': os.environ.get('HOSTNAME', ''),
			'application': job_config_dict.get('SCRAM_PROJECTVERSION', self._app),
			'exe': job_config_dict.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype,
			'scheduler': wms.get_object_name(), 'vo': token.get_group(),
			'nevtJob': job_config_dict.get('MAX_EVENTS', 0),
			'datasetFull': job_config_dict.get('DATASETPATH', 'none')}])

	def on_job_update(self, wms, job_obj, jobnum, data):
		self._update_dashboard(wms, job_obj, jobnum, job_obj, {})

	def on_workflow_finish(self):
		self._tp.wait_and_drop(self._dashboard_timeout)

	def _publish(self, job_obj, jobnum, task_id, usermsg):
		(_, backend, wms_id) = job_obj.gc_id.split('.', 2)
		dash_id = '%s_%s' % (jobnum, wms_id)
		if 'http' not in job_obj.gc_id:
			dash_id = '%s_https://%s:/%s' % (jobnum, backend, wms_id)
		msg = dict_union({'taskId': task_id, 'jobId': dash_id, 'sid': wms_id}, *usermsg)
		DashboardAPI(task_id, dash_id).publish(**filter_dict(msg, value_filter=identity))

	def _start_publish(self, job_obj, jobnum, desc, msg):
		task_id = self._task.substitute_variables('dashboard task id', self._taskname, jobnum,
			additional_var_dict={'DATASETNICK': ''}).strip('_')
		self._tp.start_daemon('Notifying dashboard about %s of job %d' % (desc, jobnum),
			self._publish, job_obj, jobnum, task_id, msg)

	def _update_dashboard(self, wms, job_obj, jobnum, data, add_dict):
		# Called on job status update and output
		# Translate status into dashboard status message
		status_dashboard = self._map_status_job2dashboard.get(job_obj.state, 'PENDING')
		self._start_publish(job_obj, jobnum, 'status', [{'StatusValue': status_dashboard,
			'StatusValueReason': data.get('reason', status_dashboard).upper(),
			'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
			'StatusDestination': job_obj.get_job_location()}, add_dict])
Beispiel #2
0
	def __init__(self, config, name):
		LocalEventHandler.__init__(self, config, name)
		self._silent = config.get_bool('silent', True, on_change=None)
		self._script_submit = config.get_command('on submit', '', on_change=None)
		self._script_status = config.get_command('on status', '', on_change=None)
		self._script_output = config.get_command('on output', '', on_change=None)
		self._script_finish = config.get_command('on finish', '', on_change=None)
		self._script_timeout = config.get_time('script timeout', 20, on_change=None)
		self._path_work = config.get_work_path()
		self._tp = GCThreadPool()
 def __init__(self, config, name, task):
     Monitoring.__init__(self, config, name, task)
     self._silent = config.getBool('silent', True, onChange=None)
     self._evtSubmit = config.getCommand('on submit', '', onChange=None)
     self._evtStatus = config.getCommand('on status', '', onChange=None)
     self._evtOutput = config.getCommand('on output', '', onChange=None)
     self._evtFinish = config.getCommand('on finish', '', onChange=None)
     self._runningMax = config.getTime('script runtime', 5, onChange=None)
     self._workPath = config.getWorkPath()
     self._tp = GCThreadPool()
	def __init__(self, config, name, task):
		LocalEventHandler.__init__(self, config, name, task)
		self._app = config.get('application', 'shellscript', on_change=None)
		self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None)
		self._tasktype = config.get('task', 'analysis', on_change=None)
		self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None)
		self._map_status_job2dashboard = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE',
			Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'}
		self._tp = GCThreadPool()
	def __init__(self, config, name, task):
		Monitoring.__init__(self, config, name, task)
		jobDesc = task.getDescription(None) # TODO: use the other variables for monitoring
		self._app = config.get('application', 'shellscript', onChange = None)
		self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange = None)
		self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange = None)
		self._statusMap = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE',
			Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'}
		self._tp = GCThreadPool()
	def __init__(self, config, name, task):
		Monitoring.__init__(self, config, name, task)
		self._silent = config.getBool('silent', True, onChange = None)
		self._evtSubmit = config.getCommand('on submit', '', onChange = None)
		self._evtStatus = config.getCommand('on status', '', onChange = None)
		self._evtOutput = config.getCommand('on output', '', onChange = None)
		self._evtFinish = config.getCommand('on finish', '', onChange = None)
		self._runningMax = config.getTime('script runtime', 5, onChange = None)
		self._workPath = config.getWorkPath()
		self._tp = GCThreadPool()
 def __init__(self, config, name, task):
     LocalEventHandler.__init__(self, config, name, task)
     self._app = config.get('application', 'shellscript', on_change=None)
     self._dashboard_timeout = config.get_time('dashboard timeout',
                                               5,
                                               on_change=None)
     self._tasktype = config.get('task', 'analysis', on_change=None)
     self._taskname = config.get('task name',
                                 '@GC_TASK_ID@_@DATASETNICK@',
                                 on_change=None)
     self._map_status_job2dashboard = {
         Job.DONE: 'DONE',
         Job.FAILED: 'DONE',
         Job.SUCCESS: 'DONE',
         Job.RUNNING: 'RUNNING',
         Job.ABORTED: 'ABORTED',
         Job.CANCELLED: 'CANCELLED'
     }
     self._tp = GCThreadPool()
def process_all(opts, args):
	# Init everything in each loop to pick up changes
	script_obj = get_script_object(args[0], opts.job_selector, only_success=False)
	token = AccessToken.create_instance(opts.token, script_obj.new_config, 'token')
	work_dn = script_obj.config.get_work_path()
	if process_all.first:
		logging.getLogger().addHandler(ProcessArchiveHandler(os.path.join(work_dn, 'error.tar')))
		process_all.first = False

	# Create SE output dir
	if not opts.output:
		opts.output = os.path.join(work_dn, 'se_output')
	if '://' not in opts.output:
		opts.output = 'file:///%s' % os.path.abspath(opts.output)

	job_db = script_obj.job_db
	jobnum_list = job_db.get_job_list()
	status_mon = StatusMonitor(len(jobnum_list))
	if opts.shuffle:
		random.shuffle(jobnum_list)
	else:
		jobnum_list.sort()

	if opts.threads:
		activity = Activity('Processing jobs')
		pool = GCThreadPool(opts.threads)
		for jobnum in jobnum_list:
			pool.start_daemon('Processing job %d' % jobnum, process_job,
				opts, work_dn, status_mon, job_db, token, jobnum)
		pool.wait_and_drop()
		activity.finish()
	else:
		progress = ProgressActivity('Processing job', max(jobnum_list) + 1)
		for jobnum in jobnum_list:
			progress.update_progress(jobnum)
			process_job(opts, work_dn, status_mon, job_db, token, jobnum)
		progress.finish()

	# Print overview
	if not opts.hide_results:
		status_mon.show_results()
	return status_mon.is_finished()
 def __init__(self, config, name, task):
     Monitoring.__init__(self, config, name, task)
     jobDesc = task.getDescription(
         None)  # TODO: use the other variables for monitoring
     self._app = config.get('application', 'shellscript', onChange=None)
     self._tasktype = config.get('task',
                                 jobDesc.jobType or 'analysis',
                                 onChange=None)
     self._taskname = config.get('task name',
                                 '@GC_TASK_ID@_@DATASETNICK@',
                                 onChange=None)
     self._statusMap = {
         Job.DONE: 'DONE',
         Job.FAILED: 'DONE',
         Job.SUCCESS: 'DONE',
         Job.RUNNING: 'RUNNING',
         Job.ABORTED: 'ABORTED',
         Job.CANCELLED: 'CANCELLED'
     }
     self._tp = GCThreadPool()
def process_all(opts, args):
    # Init everything in each loop to pick up changes
    script_obj = get_script_object(args[0],
                                   opts.job_selector,
                                   only_success=False)
    token = AccessToken.create_instance(opts.token, script_obj.new_config,
                                        'token')
    work_dn = script_obj.config.get_work_path()
    if process_all.first:
        logging.getLogger().addHandler(
            ProcessArchiveHandler(os.path.join(work_dn, 'error.tar')))
        process_all.first = False

    # Create SE output dir
    if not opts.output:
        opts.output = os.path.join(work_dn, 'se_output')
    if '://' not in opts.output:
        opts.output = 'file:///%s' % os.path.abspath(opts.output)

    job_db = script_obj.job_db
    jobnum_list = job_db.get_job_list()
    status_mon = StatusMonitor(len(jobnum_list))
    if opts.shuffle:
        random.shuffle(jobnum_list)
    else:
        jobnum_list.sort()

    if opts.threads:
        activity = Activity('Processing jobs')
        pool = GCThreadPool(opts.threads)
        for jobnum in jobnum_list:
            pool.start_daemon('Processing job %d' % jobnum, process_job, opts,
                              work_dn, status_mon, job_db, token, jobnum)
        pool.wait_and_drop()
        activity.finish()
    else:
        progress = ProgressActivity('Processing job', max(jobnum_list) + 1)
        for jobnum in jobnum_list:
            progress.update_progress(jobnum)
            process_job(opts, work_dn, status_mon, job_db, token, jobnum)
        progress.finish()

    # Print overview
    if not opts.hide_results:
        status_mon.show_results()
    return status_mon.is_finished()
Beispiel #11
0
class ScriptEventHandler(LocalEventHandler):
	alias_list = ['scripts']
	config_section_list = LocalEventHandler.config_section_list + ['scripts']

	def __init__(self, config, name):
		LocalEventHandler.__init__(self, config, name)
		self._silent = config.get_bool('silent', True, on_change=None)
		self._script_submit = config.get_command('on submit', '', on_change=None)
		self._script_status = config.get_command('on status', '', on_change=None)
		self._script_output = config.get_command('on output', '', on_change=None)
		self._script_finish = config.get_command('on finish', '', on_change=None)
		self._script_timeout = config.get_time('script timeout', 20, on_change=None)
		self._path_work = config.get_work_path()
		self._tp = GCThreadPool()

	def on_job_output(self, task, wms, job_obj, jobnum, exit_code):
		# Called on job status update
		self._run_in_background(self._script_output, task, jobnum, job_obj, {'RETCODE': exit_code})

	def on_job_submit(self, task, wms, job_obj, jobnum):
		# Called on job submission
		self._run_in_background(self._script_submit, task, jobnum, job_obj)

	def on_job_update(self, task, wms, job_obj, jobnum, data):
		# Called on job status update
		self._run_in_background(self._script_status, task, jobnum, job_obj)

	def on_task_finish(self, task, job_len):
		# Called at the end of the task
		self._run_in_background(self._script_finish, task,
			jobnum=0, additional_var_dict={'NJOBS': job_len})

	def on_workflow_finish(self):
		self._tp.wait_and_drop(self._script_timeout)

	def _run_in_background(self, script, task, jobnum=None, job_obj=None, additional_var_dict=None):
		if script != '':
			self._tp.start_daemon('Running event handler script %s' % script,
				self._script_thread, script, task, jobnum, job_obj, additional_var_dict)

	def _script_thread(self, script, task, jobnum=None, job_obj=None, add_dict=None):
		# Get both task and job config / state dicts
		try:
			tmp = {}
			if job_obj is not None:
				for key, value in job_obj.get_dict().items():
					tmp[key.upper()] = value
			tmp['GC_WORKDIR'] = self._path_work
			if jobnum is not None:
				tmp.update(task.get_job_dict(jobnum))
			tmp.update(add_dict or {})
			env = dict(os.environ)
			for key, value in tmp.items():
				if not key.startswith('GC_'):
					key = 'GC_' + key
				env[key] = str(value)

			script = task.substitute_variables('monitoring script', script, jobnum, tmp)
			if not self._silent:
				proc = LocalProcess(*shlex.split(script), **{'env_dict': env})
				proc_output = proc.get_output(timeout=self._script_timeout)
				if proc_output.strip():
					self._log.info(proc_output.strip())
			else:
				os.system(script)
		except Exception:
			self._log.exception('Error while running user script')
			clear_current_exception()
class ScriptMonitoring(Monitoring):
	alias = ['scripts']
	configSections = EventHandler.configSections + ['scripts']

	def __init__(self, config, name, task):
		Monitoring.__init__(self, config, name, task)
		self._silent = config.getBool('silent', True, onChange = None)
		self._evtSubmit = config.getCommand('on submit', '', onChange = None)
		self._evtStatus = config.getCommand('on status', '', onChange = None)
		self._evtOutput = config.getCommand('on output', '', onChange = None)
		self._evtFinish = config.getCommand('on finish', '', onChange = None)
		self._runningMax = config.getTime('script runtime', 5, onChange = None)
		self._workPath = config.getWorkPath()
		self._tp = GCThreadPool()

	# Get both task and job config / state dicts
	def _scriptThread(self, script, jobNum = None, jobObj = None, allDict = None):
		try:
			tmp = {}
			if jobNum is not None:
				tmp.update(self._task.getSubmitInfo(jobNum))
			if jobObj is not None:
				tmp.update(jobObj.getAll())
			tmp['WORKDIR'] = self._workPath
			tmp.update(self._task.getTaskConfig())
			if jobNum is not None:
				tmp.update(self._task.getJobConfig(jobNum))
				tmp.update(self._task.getSubmitInfo(jobNum))
			tmp.update(allDict or {})
			for key, value in tmp.items():
				if not key.startswith('GC_'):
					key = 'GC_' + key
				os.environ[key] = str(value)

			script = self._task.substVars(script, jobNum, tmp)
			if not self._silent:
				proc = LocalProcess(script)
				self._log.info(proc.get_output(timeout = self._runningMax))
			else:
				os.system(script)
		except Exception:
			self._log.exception('Error while running user script!')

	def _runInBackground(self, script, jobNum = None, jobObj = None, addDict = None):
		if script != '':
			self._tp.start_thread('Running monitoring script %s' % script,
				self._scriptThread, script, jobNum, jobObj, addDict)

	# Called on job submission
	def onJobSubmit(self, wms, jobObj, jobNum):
		self._runInBackground(self._evtSubmit, jobNum, jobObj)

	# Called on job status update
	def onJobUpdate(self, wms, jobObj, jobNum, data):
		self._runInBackground(self._evtStatus, jobNum, jobObj, {'STATUS': Job.enum2str(jobObj.state)})

	# Called on job status update
	def onJobOutput(self, wms, jobObj, jobNum, retCode):
		self._runInBackground(self._evtOutput, jobNum, jobObj, {'RETCODE': retCode})

	# Called at the end of the task
	def onTaskFinish(self, nJobs):
		self._runInBackground(self._evtFinish, addDict = {'NJOBS': nJobs})
		self._tp.wait_and_drop(self._runningMax)
class DashBoard(Monitoring):
    configSections = Monitoring.configSections + ['dashboard']

    def __init__(self, config, name, task):
        Monitoring.__init__(self, config, name, task)
        jobDesc = task.getDescription(
            None)  # TODO: use the other variables for monitoring
        self._app = config.get('application', 'shellscript', onChange=None)
        self._runningMax = config.getTime('dashboard timeout',
                                          5,
                                          onChange=None)
        self._tasktype = config.get('task',
                                    jobDesc.jobType or 'analysis',
                                    onChange=None)
        self._taskname = config.get('task name',
                                    '@GC_TASK_ID@_@DATASETNICK@',
                                    onChange=None)
        self._statusMap = {
            Job.DONE: 'DONE',
            Job.FAILED: 'DONE',
            Job.SUCCESS: 'DONE',
            Job.RUNNING: 'RUNNING',
            Job.ABORTED: 'ABORTED',
            Job.CANCELLED: 'CANCELLED'
        }
        self._tp = GCThreadPool()

    def getScript(self):
        yield pathShare('mon.dashboard.sh', pkg='grid_control_cms')

    def getTaskConfig(self):
        result = {
            'TASK_NAME': self._taskname,
            'DB_EXEC': self._app,
            'DATASETNICK': ''
        }
        result.update(Monitoring.getTaskConfig(self))
        return result

    def getFiles(self):
        yield pathShare('mon.dashboard.sh', pkg='grid_control_cms')
        for fn in ('DashboardAPI.py', 'Logger.py', 'apmon.py', 'report.py'):
            yield pathShare('..', 'DashboardAPI', fn, pkg='grid_control_cms')

    def _publish(self, jobObj, jobNum, taskId, usermsg):
        (_, backend, rawId) = jobObj.gcID.split('.', 2)
        dashId = '%s_%s' % (jobNum, rawId)
        if 'http' not in jobObj.gcID:
            dashId = '%s_https://%s:/%s' % (jobNum, backend, rawId)
        msg = mergeDicts([{
            'taskId': taskId,
            'jobId': dashId,
            'sid': rawId
        }] + usermsg)
        DashboardAPI(
            taskId,
            dashId).publish(**filterDict(msg, vF=lambda v: v is not None))

    def _start_publish(self, jobObj, jobNum, desc, message):
        taskId = self._task.substVars('dashboard task id',
                                      self._taskname,
                                      jobNum,
                                      addDict={
                                          'DATASETNICK': ''
                                      }).strip('_')
        self._tp.start_thread(
            'Notifying dashboard about %s of job %d' % (desc, jobNum),
            self._publish, jobObj, jobNum, taskId, message)

    # Called on job submission
    def onJobSubmit(self, wms, jobObj, jobNum):
        token = wms.getAccessToken(jobObj.gcID)
        jobInfo = self._task.getJobConfig(jobNum)
        self._start_publish(
            jobObj, jobNum, 'submission',
            [{
                'user': os.environ['LOGNAME'],
                'GridName': '/CN=%s' % token.getUsername(),
                'CMSUser': token.getUsername(),
                'tool': 'grid-control',
                'JSToolVersion': getVersion(),
                'SubmissionType': 'direct',
                'tool_ui': os.environ.get('HOSTNAME', ''),
                'application': jobInfo.get('SCRAM_PROJECTVERSION', self._app),
                'exe': jobInfo.get('CMSSW_EXEC', 'shellscript'),
                'taskType': self._tasktype,
                'scheduler': wms.getObjectName(),
                'vo': token.getGroup(),
                'nevtJob': jobInfo.get('MAX_EVENTS', 0),
                'datasetFull': jobInfo.get('DATASETPATH', 'none')
            }])

    # Called on job status update and output
    def _updateDashboard(self, wms, jobObj, jobNum, data, addMsg):
        # Translate status into dashboard status message
        statusDashboard = self._statusMap.get(jobObj.state, 'PENDING')
        self._start_publish(jobObj, jobNum, 'status', [{
            'StatusValue':
            statusDashboard,
            'StatusValueReason':
            data.get('reason', statusDashboard).upper(),
            'StatusEnterTime':
            data.get('timestamp',
                     time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
            'StatusDestination':
            data.get('dest', '')
        }, addMsg])

    def onJobUpdate(self, wms, jobObj, jobNum, data):
        self._updateDashboard(wms, jobObj, jobNum, jobObj, {})

    def onJobOutput(self, wms, jobObj, jobNum, retCode):
        self._updateDashboard(wms, jobObj, jobNum, jobObj,
                              {'ExeExitCode': retCode})

    def onFinish(self):
        self._tp.wait_and_drop(self._runningMax)
class DashboardLocal(LocalEventHandler):
    alias_list = ['dashboard']
    config_section_list = LocalEventHandler.config_section_list + ['dashboard']

    def __init__(self, config, name, task):
        LocalEventHandler.__init__(self, config, name, task)
        self._app = config.get('application', 'shellscript', on_change=None)
        self._dashboard_timeout = config.get_time('dashboard timeout',
                                                  5,
                                                  on_change=None)
        self._tasktype = config.get('task', 'analysis', on_change=None)
        self._taskname = config.get('task name',
                                    '@GC_TASK_ID@_@DATASETNICK@',
                                    on_change=None)
        self._map_status_job2dashboard = {
            Job.DONE: 'DONE',
            Job.FAILED: 'DONE',
            Job.SUCCESS: 'DONE',
            Job.RUNNING: 'RUNNING',
            Job.ABORTED: 'ABORTED',
            Job.CANCELLED: 'CANCELLED'
        }
        self._tp = GCThreadPool()

    def on_job_output(self, wms, job_obj, jobnum, exit_code):
        self._update_dashboard(wms, job_obj, jobnum, job_obj,
                               {'ExeExitCode': exit_code})

    def on_job_submit(self, wms, job_obj, jobnum):
        # Called on job submission
        token = wms.get_access_token(job_obj.gc_id)
        job_config_dict = self._task.get_job_dict(jobnum)
        self._start_publish(job_obj, jobnum, 'submission', [{
            'user':
            get_local_username(),
            'GridName':
            '/CN=%s' % token.get_user_name(),
            'CMSUser':
            token.get_user_name(),
            'tool':
            'grid-control',
            'JSToolVersion':
            get_version(),
            'SubmissionType':
            'direct',
            'tool_ui':
            os.environ.get('HOSTNAME', ''),
            'application':
            job_config_dict.get('SCRAM_PROJECTVERSION', self._app),
            'exe':
            job_config_dict.get('CMSSW_EXEC', 'shellscript'),
            'taskType':
            self._tasktype,
            'scheduler':
            wms.get_object_name(),
            'vo':
            token.get_group(),
            'nevtJob':
            job_config_dict.get('MAX_EVENTS', 0),
            'datasetFull':
            job_config_dict.get('DATASETPATH', 'none')
        }])

    def on_job_update(self, wms, job_obj, jobnum, data):
        self._update_dashboard(wms, job_obj, jobnum, job_obj, {})

    def on_workflow_finish(self):
        self._tp.wait_and_drop(self._dashboard_timeout)

    def _publish(self, job_obj, jobnum, task_id, usermsg):
        (_, backend, wms_id) = job_obj.gc_id.split('.', 2)
        dash_id = '%s_%s' % (jobnum, wms_id)
        if 'http' not in job_obj.gc_id:
            dash_id = '%s_https://%s:/%s' % (jobnum, backend, wms_id)
        msg = dict_union({
            'taskId': task_id,
            'jobId': dash_id,
            'sid': wms_id
        }, *usermsg)
        DashboardAPI(
            task_id,
            dash_id).publish(**filter_dict(msg, value_filter=identity))

    def _start_publish(self, job_obj, jobnum, desc, msg):
        task_id = self._task.substitute_variables('dashboard task id',
                                                  self._taskname,
                                                  jobnum,
                                                  additional_var_dict={
                                                      'DATASETNICK': ''
                                                  }).strip('_')
        self._tp.start_daemon(
            'Notifying dashboard about %s of job %d' % (desc, jobnum),
            self._publish, job_obj, jobnum, task_id, msg)

    def _update_dashboard(self, wms, job_obj, jobnum, data, add_dict):
        # Called on job status update and output
        # Translate status into dashboard status message
        status_dashboard = self._map_status_job2dashboard.get(
            job_obj.state, 'PENDING')
        self._start_publish(job_obj, jobnum, 'status', [{
            'StatusValue':
            status_dashboard,
            'StatusValueReason':
            data.get('reason', status_dashboard).upper(),
            'StatusEnterTime':
            data.get('timestamp',
                     time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
            'StatusDestination':
            job_obj.get_job_location()
        }, add_dict])
class ScriptMonitoring(Monitoring):
    alias = ['scripts']
    configSections = EventHandler.configSections + ['scripts']

    def __init__(self, config, name, task):
        Monitoring.__init__(self, config, name, task)
        self._silent = config.getBool('silent', True, onChange=None)
        self._evtSubmit = config.getCommand('on submit', '', onChange=None)
        self._evtStatus = config.getCommand('on status', '', onChange=None)
        self._evtOutput = config.getCommand('on output', '', onChange=None)
        self._evtFinish = config.getCommand('on finish', '', onChange=None)
        self._runningMax = config.getTime('script runtime', 5, onChange=None)
        self._workPath = config.getWorkPath()
        self._tp = GCThreadPool()

    # Get both task and job config / state dicts
    def _scriptThread(self, script, jobNum=None, jobObj=None, allDict=None):
        try:
            tmp = {}
            if jobNum is not None:
                tmp.update(self._task.getSubmitInfo(jobNum))
            if jobObj is not None:
                tmp.update(jobObj.getAll())
            tmp['WORKDIR'] = self._workPath
            tmp.update(self._task.getTaskConfig())
            if jobNum is not None:
                tmp.update(self._task.getJobConfig(jobNum))
                tmp.update(self._task.getSubmitInfo(jobNum))
            tmp.update(allDict or {})
            for key, value in tmp.items():
                if not key.startswith('GC_'):
                    key = 'GC_' + key
                os.environ[key] = str(value)

            script = self._task.substVars(script, jobNum, tmp)
            if not self._silent:
                proc = LocalProcess(script)
                self._log.info(proc.get_output(timeout=self._runningMax))
            else:
                os.system(script)
        except Exception:
            self._log.exception('Error while running user script!')

    def _runInBackground(self, script, jobNum=None, jobObj=None, addDict=None):
        if script != '':
            self._tp.start_thread('Running monitoring script %s' % script,
                                  self._scriptThread, script, jobNum, jobObj,
                                  addDict)

    # Called on job submission
    def onJobSubmit(self, wms, jobObj, jobNum):
        self._runInBackground(self._evtSubmit, jobNum, jobObj)

    # Called on job status update
    def onJobUpdate(self, wms, jobObj, jobNum, data):
        self._runInBackground(self._evtStatus, jobNum, jobObj,
                              {'STATUS': Job.enum2str(jobObj.state)})

    # Called on job status update
    def onJobOutput(self, wms, jobObj, jobNum, retCode):
        self._runInBackground(self._evtOutput, jobNum, jobObj,
                              {'RETCODE': retCode})

    # Called at the end of the task
    def onTaskFinish(self, nJobs):
        self._runInBackground(self._evtFinish, addDict={'NJOBS': nJobs})
        self._tp.wait_and_drop(self._runningMax)
Beispiel #16
0
class DashBoard(Monitoring):
	configSections = Monitoring.configSections + ['dashboard']

	def __init__(self, config, name, task):
		Monitoring.__init__(self, config, name, task)
		jobDesc = task.getDescription(None) # TODO: use the other variables for monitoring
		self._app = config.get('application', 'shellscript', onChange = None)
		self._runningMax = config.getTime('dashboard timeout', 5, onChange = None)
		self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange = None)
		self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange = None)
		self._statusMap = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE',
			Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'}
		self._tp = GCThreadPool()


	def getScript(self):
		yield pathShare('mon.dashboard.sh', pkg = 'grid_control_cms')


	def getTaskConfig(self):
		result = {'TASK_NAME': self._taskname, 'DB_EXEC': self._app, 'DATASETNICK': ''}
		result.update(Monitoring.getTaskConfig(self))
		return result


	def getFiles(self):
		yield pathShare('mon.dashboard.sh', pkg = 'grid_control_cms')
		for fn in ('DashboardAPI.py', 'Logger.py', 'apmon.py', 'report.py'):
			yield pathShare('..', 'DashboardAPI', fn, pkg = 'grid_control_cms')


	def _publish(self, jobObj, jobNum, taskId, usermsg):
		(_, backend, rawId) = jobObj.gcID.split('.', 2)
		dashId = '%s_%s' % (jobNum, rawId)
		if 'http' not in jobObj.gcID:
			dashId = '%s_https://%s:/%s' % (jobNum, backend, rawId)
		msg = mergeDicts([{'taskId': taskId, 'jobId': dashId, 'sid': rawId}] + usermsg)
		DashboardAPI(taskId, dashId).publish(**filterDict(msg, vF = lambda v: v is not None))


	def _start_publish(self, jobObj, jobNum, desc, message):
		taskId = self._task.substVars('dashboard task id', self._taskname, jobNum,
			addDict = {'DATASETNICK': ''}).strip('_')
		self._tp.start_thread('Notifying dashboard about %s of job %d' % (desc, jobNum),
			self._publish, jobObj, jobNum, taskId, message)


	# Called on job submission
	def onJobSubmit(self, wms, jobObj, jobNum):
		token = wms.getAccessToken(jobObj.gcID)
		jobInfo = self._task.getJobConfig(jobNum)
		self._start_publish(jobObj, jobNum, 'submission', [{
			'user': os.environ['LOGNAME'], 'GridName': '/CN=%s' % token.getUsername(), 'CMSUser': token.getUsername(),
			'tool': 'grid-control', 'JSToolVersion': getVersion(),
			'SubmissionType':'direct', 'tool_ui': os.environ.get('HOSTNAME', ''),
			'application': jobInfo.get('SCRAM_PROJECTVERSION', self._app),
			'exe': jobInfo.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype,
			'scheduler': wms.getObjectName(), 'vo': token.getGroup(),
			'nevtJob': jobInfo.get('MAX_EVENTS', 0),
			'datasetFull': jobInfo.get('DATASETPATH', 'none')}])


	# Called on job status update and output
	def _updateDashboard(self, wms, jobObj, jobNum, data, addMsg):
		# Translate status into dashboard status message
		statusDashboard = self._statusMap.get(jobObj.state, 'PENDING')
		self._start_publish(jobObj, jobNum, 'status', [{'StatusValue': statusDashboard,
			'StatusValueReason': data.get('reason', statusDashboard).upper(),
			'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
			'StatusDestination': data.get('dest', '') }, addMsg])


	def onJobUpdate(self, wms, jobObj, jobNum, data):
		self._updateDashboard(wms, jobObj, jobNum, jobObj, {})


	def onJobOutput(self, wms, jobObj, jobNum, retCode):
		self._updateDashboard(wms, jobObj, jobNum, jobObj, {'ExeExitCode': retCode})


	def onFinish(self):
		self._tp.wait_and_drop(self._runningMax)