Beispiel #1
0
	def display(self):
		taskConfig = self._task.getTaskConfig()
		header = lzip(taskConfig, taskConfig)
		header.extend(imap(lambda key: (key, '<%s>' % key), self._task.getTransientVars()))
		variables = set()
		entries = []
		for jobNum in self._jobDB.getJobs(self._selector):
			jobConfig = self._task.getJobConfig(jobNum)
			variables.update(jobConfig)
			entry = dict(taskConfig)
			entry.update(self._task.getTransientVars())
			entry.update(jobConfig)
			entries.append(entry)
		printTabular(sorted(header + lzip(variables, variables)), entries)
Beispiel #2
0
	def _submit_jobs(self, jobnum_list, task):
		# submit_jobs: Submit a number of jobs and yield (jobnum, WMS ID, other data) sequentially
		# >>jobnum: internal ID of the Job
		# JobNum is linked to the actual *task* here
		(jdl_fn, submit_jdl_fn) = self._submit_jobs_prepare(jobnum_list, task)
		try:
			# submit all jobs simultaneously and temporarily store verbose (ClassAdd) output
			activity = Activity('queuing jobs at scheduler')
			submit_args = ' -verbose -batch-name ' + task.get_description().task_name + ' ' + submit_jdl_fn
			proc = self._proc_factory.logged_execute(self._submit_exec, submit_args)

			# extract the Condor ID (WMS ID) of the jobs from output ClassAds
			jobnum_gc_id_list = []
			for line in proc.iter():
				if 'GridControl_GCIDtoWMSID' in line:
					jobnum_wms_id = line.split('=')[1].strip(' "\n').split('@')
					jobnum, wms_id = int(jobnum_wms_id[0]), jobnum_wms_id[1].strip()
					# Condor creates a default job then overwrites settings on any subsequent job
					# i.e. skip every second, but better be sure
					if (not jobnum_gc_id_list) or (jobnum not in lzip(*jobnum_gc_id_list)[0]):
						jobnum_gc_id_list.append((jobnum, self._create_gc_id(wms_id)))

			exit_code = proc.wait()
			activity.finish()
			if (exit_code != 0) or (len(jobnum_gc_id_list) < len(jobnum_list)):
				if not self._explain_error(proc, exit_code):
					self._log.error('Submitted %4d jobs of %4d expected',
						len(jobnum_gc_id_list), len(jobnum_list))
					proc.log_error(self._error_log_fn, jdl=jdl_fn)
		finally:
			remove_files([jdl_fn])

		for (jobnum, gc_id) in jobnum_gc_id_list:
			yield (jobnum, gc_id, {})
def draw_pie(ax, breakdown, pos, size, piecolor = None):
	piecolor = piecolor or ['red', 'orange', 'green', 'blue', 'purple']
	breakdown = [0] + list(numpy.cumsum(breakdown)* 1.0 / sum(breakdown))
	for i in irange(len(breakdown)-1):
		x = [0] + numpy.cos(numpy.linspace(2 * math.pi * breakdown[i], 2 * math.pi * breakdown[i+1], 20)).tolist()
		y = [0] + numpy.sin(numpy.linspace(2 * math.pi * breakdown[i], 2 * math.pi * breakdown[i+1], 20)).tolist()
		ax.scatter(pos[0], pos[1], marker=(lzip(x, y), 0), s = size, facecolor = piecolor[i % len(piecolor)])
Beispiel #4
0
 def display(self):
     taskConfig = self._task.getTaskConfig()
     header = lzip(taskConfig, taskConfig)
     header.extend(
         imap(lambda key: (key, '<%s>' % key),
              self._task.getTransientVars()))
     variables = set()
     entries = []
     for jobNum in self._jobDB.getJobs(self._selector):
         jobConfig = self._task.getJobConfig(jobNum)
         variables.update(jobConfig)
         entry = dict(taskConfig)
         entry.update(self._task.getTransientVars())
         entry.update(jobConfig)
         entries.append(entry)
     printTabular(sorted(header + lzip(variables, variables)), entries)
Beispiel #5
0
    def show_report(self, job_db, jobnum_list):
        state_map = dict(self._state_map)

        def _transform(data, label, level):
            if None in data:
                total = data.pop(None)
                if len(data) > 1:
                    for result in self._get_entry(state_map, total, ['Total']):
                        yield result
                    yield '='
            for idx, entry in enumerate(sorted(data)):
                if level == 1:
                    for result in self._get_entry(state_map, data[entry],
                                                  [entry] + label):
                        yield result
                else:
                    for result in _transform(data[entry], [entry] + label,
                                             level - 1):
                        yield result
                if idx != len(data) - 1:
                    yield '-'

        stats = self._get_hierachical_stats_dict(job_db, jobnum_list)
        displace_states_list = lmap(itemgetter(1), self._state_map)
        header = [('', 'Category')] + lzip(displace_states_list,
                                           displace_states_list)
        self._show_table(header,
                         _transform(stats, [], len(self._idx_list)),
                         align_str='l' + 'c' * len(state_map),
                         fmt_dict={'': lambda x: str.join(' ', x)})
Beispiel #6
0
	def _submit_jobs(self, jobnum_list, task):
		# submit_jobs: Submit a number of jobs and yield (jobnum, WMS ID, other data) sequentially
		# >>jobnum: internal ID of the Job
		# JobNum is linked to the actual *task* here
		(jdl_fn, submit_jdl_fn) = self._submit_jobs_prepare(jobnum_list, task)
		try:
			# submit all jobs simultaneously and temporarily store verbose (ClassAdd) output
			activity = Activity('queuing jobs at scheduler')
			submit_args = ' -verbose -batch-name ' + task.get_description().task_name + ' ' + submit_jdl_fn
			proc = self._proc_factory.logged_execute(self._submit_exec, submit_args)

			# extract the Condor ID (WMS ID) of the jobs from output ClassAds
			jobnum_gc_id_list = []
			for line in proc.iter():
				if 'GridControl_GCIDtoWMSID' in line:
					jobnum_wms_id = line.split('=')[1].strip(' "\n').split('@')
					jobnum, wms_id = int(jobnum_wms_id[0]), jobnum_wms_id[1].strip()
					# Condor creates a default job then overwrites settings on any subsequent job
					# i.e. skip every second, but better be sure
					if (not jobnum_gc_id_list) or (jobnum not in lzip(*jobnum_gc_id_list)[0]):
						jobnum_gc_id_list.append((jobnum, self._create_gc_id(wms_id)))

			exit_code = proc.wait()
			activity.finish()
			if (exit_code != 0) or (len(jobnum_gc_id_list) < len(jobnum_list)):
				if not self._explain_error(proc, exit_code):
					self._log.error('Submitted %4d jobs of %4d expected',
						len(jobnum_gc_id_list), len(jobnum_list))
					proc.log_error(self._error_log_fn, jdl=jdl_fn)
		finally:
			remove_files([jdl_fn])

		for (jobnum, gc_id) in jobnum_gc_id_list:
			yield (jobnum, gc_id, {})
Beispiel #7
0
    def _parseParameterTuple(self, varName, tupleValue, tupleType, varType,
                             varIndex):
        if tupleType == 'tuple':
            tupleDelimeter = self.get(self._getParameterOption(varName),
                                      'delimeter', ',')
            tupleStrings = lmap(
                str.strip,
                utils.split_advanced(tupleValue, lambda tok: tok in ' \n',
                                     lambda tok: False))
            tupleList = lmap(lambda t: parseTuple(t, tupleDelimeter),
                             tupleStrings)
        elif tupleType == 'binning':
            tupleList = lzip(tupleValue.split(), tupleValue.split()[1:])

        result = []
        for tupleEntry in tupleList:
            try:
                tmp = self._parseParameter(varName, tupleEntry[varIndex],
                                           varType)
            except Exception:
                raise ConfigError('Unable to parse %r' % repr(
                    (tupleEntry, tupleStrings)))
            if isinstance(tmp, list):
                if len(tmp) != 1:
                    raise ConfigError(
                        '[Variable: %s] Tuple entry (%s) expands to multiple variable entries (%s)!'
                        % (varName, tupleEntry[varIndex], tmp))
                result.append(tmp[0])
            else:
                result.append(tmp)
        return result
Beispiel #8
0
def replaceDict(result, allVars, varMapping=None):
    for (virtual, real) in QM(varMapping, varMapping,
                              lzip(allVars.keys(), allVars.keys())):
        for delim in ['@', '__']:
            result = result.replace(delim + virtual + delim,
                                    str(allVars.get(real, '')))
    return result
    def __init__(self, head, data, fmt=None, top=True):
        self.table = """
<style type="text/css">
	table {font-size:12px;color:#333333;border-width: 1px;border-color: #7799aa;border-collapse: collapse;}
	th {font-size:12px;background-color:#aacccc;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;text-align:left;}
	tr {background-color:#ffffff;}
	td {font-size:12px;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;}
</style>"""
        fmt = fmt or {}
        lookupDict = lmap(
            lambda id_name: (id_name[0], fmt.get(id_name[0], str)), head)
        headerList = lmap(lambda id_name: '<th>%s</th>' % id_name[1], head)

        def entryList(entry):
            return lmap(
                lambda id_fmt: '<td>%s</td>' % id_fmt[1](entry.get(id_fmt[0])),
                lookupDict)

        rowList = [headerList] + lmap(entryList, data)
        if not top:
            rowList = lzip(*rowList)
        rows = lmap(lambda row: '\t<tr>%s</tr>\n' % str.join('', row), rowList)
        if top:
            widthStr = 'width:100%;'
        else:
            widthStr = ''
        self.table += '<table style="%s" border="1">\n%s</table>' % (
            widthStr, str.join('', rows))
    def jobs(self, *args, **kw):
        result = '<body>'
        result += str(CPProgressBar(0, min(100, self.counter), 100, 300))
        if 'job' in kw:
            jobNum = int(kw['job'])
            info = self.task.getJobConfig(jobNum)
            result += str(
                TabularHTML(lzip(sorted(info), sorted(info)), [info],
                            top=False))

        def getJobObjs():
            for jobNum in self.jobMgr.jobDB.getJobs():
                result = self.jobMgr.jobDB.get(jobNum).__dict__
                result['jobNum'] = jobNum
                result.update(result['dict'])
                yield result

        fmtTime = lambda t: time.strftime('%Y-%m-%d %T', time.localtime(t))
        result += str(
            TabularHTML(
                [('jobNum', 'Job'), ('state', 'Status'),
                 ('attempt', 'Attempt'), ('wmsId', 'WMS ID'),
                 ('dest', 'Destination'), ('submitted', 'Submitted')],
                getJobObjs(),
                fmt={
                    'jobNum': lambda x: '<a href="jobs?job=%s">%s</a>' %
                    (x, x),
                    'state': Job.enum2str,
                    'submitted': fmtTime
                },
                top=True))
        result += '</body>'
        return result
		def getStatusDirect(wmsId):
			wrStatus.getStatusDirect(wmsId, 0)
			err, apiMsg = wrStatus.get_error()
			if err:
				raise BackendError(apiMsg)
			info = wrStatus.loadStatus()
			return lzip(imap(str.lower, jobStatus.states_names), info[0:jobStatus.ATTR_MAX])
Beispiel #12
0
    def jobs(self, *args, **kw):
        element_list = [CPProgressBar(0, min(100, self._counter), 100, 300)]
        if 'job' in kw:
            jobnum = int(kw['job'])
            info = self._workflow.task.get_job_dict(jobnum)
            element_list.append(
                CPTable(lzip(sorted(info), sorted(info)), [info], pivot=False))

        def _fmt_time(value):
            return time.strftime('%Y-%m-%d %T', time.localtime(value))

        def _iter_job_objs():
            for jobnum in self._workflow.job_manager.job_db.get_job_list():
                result = self._workflow.job_manager.job_db.get_job_transient(
                    jobnum).__dict__
                result['jobnum'] = jobnum
                result.update(result['dict'])
                yield result

        header_list = [('jobnum', 'Job'), ('state', 'Status'),
                       ('attempt', 'Attempt'), ('gc_id', 'WMS ID'),
                       ('SITE', 'Site'), ('QUEUE', 'Queue'),
                       ('submitted', 'Submitted')]
        fmt_dict = {
            'jobnum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x),
            'state': Job.enum2str,
            'submitted': _fmt_time
        }
        element_list.append(
            CPTable(header_list,
                    _iter_job_objs(),
                    fmt_dict=fmt_dict,
                    pivot=True))
        return _get_html_page(element_list)
Beispiel #13
0
	def display(self):
		stateMap = dict(self._stateMap)

		def transform(data, label, level):
			if None in data:
				total = data.pop(None)
				if (len(data) > 1):
					for result in self._get_entry(stateMap, total, ['Total']):
						yield result
					yield '='
			for idx, entry in enumerate(sorted(data)):
				if level == 1:
					for result in self._get_entry(stateMap, data[entry], [entry] + label):
						yield result
				else:
					for result in transform(data[entry], [entry] + label, level - 1):
						yield result
				if idx != len(data) - 1:
					yield '-'
		stats = self._getHierachicalStats()
		displayStates = lmap(itemgetter(1), self._stateMap)
		header = [('', 'Category')] + lzip(displayStates, displayStates)
		printTabular(header, transform(stats, [], len(self._idxList)),
			fmtString = 'l' + 'c'*len(stateMap), fmt = {'': lambda x: str.join(' ', x)})
		return 0
Beispiel #14
0
def replace_with_dict(value, mapping_values, mapping_keys=None):
    mapping_keys = mapping_keys or lzip(mapping_values.keys(),
                                        mapping_values.keys())
    for (virtual, real) in mapping_keys:
        for delim in ['@', '__']:
            value = value.replace(delim + virtual + delim,
                                  str(mapping_values.get(real, '')))
    return value
Beispiel #15
0
def draw_pie(ax, breakdown, pos, size, piecolor = None):
	piecolor = piecolor or ['red', 'orange', 'green', 'blue', 'purple']
	breakdown = [0] + list(numpy.cumsum(breakdown)* 1.0 / sum(breakdown))
	for i in irange(len(breakdown)-1):
		fracs = numpy.linspace(2 * math.pi * breakdown[i], 2 * math.pi * breakdown[i+1], 20)
		x = [0] + numpy.cos(fracs).tolist()
		y = [0] + numpy.sin(fracs).tolist()
		ax.scatter(pos[0], pos[1], marker=(lzip(x, y), 0), s = size, facecolor = piecolor[i % len(piecolor)])
 def _get_status_direct(wms_id):
     wrapper_status.getStatus(wms_id, 0)
     err, api_msg = wrapper_status.get_error()
     if err:
         raise BackendError(api_msg)
     info = wrapper_status.loadStatus()
     return lzip(imap(str.lower, job_status.states_names),
                 info[0:job_status.ATTR_MAX])
def display_metadata(dataset_list, block, metadata_key_list, metadata_list, base_header_list=None):
	header_list = [(DataProvider.BlockName, 'Block')] + (base_header_list or []) + \
		lzip(sorted(metadata_key_list), sorted(metadata_key_list))
	for metadata in metadata_list:
		metadata[DataProvider.Dataset] = block[DataProvider.Dataset]
		metadata[DataProvider.BlockName] = block.get(DataProvider.BlockName)
	title = get_title_update_header(dataset_list, header_list)
	ConsoleTable.create(header_list, metadata_list, title=title, pivot=True)
Beispiel #18
0
 def getStatusDirect(wmsID):
     wrStatus.getStatus(wmsID, 0)
     err, apiMsg = wrStatus.get_error()
     if err:
         raise BackendError(apiMsg)
     info = wrStatus.loadStatus()
     return lzip(imap(str.lower, jobStatus.states_names),
                 info[0:jobStatus.ATTR_MAX])
	def show_report(self, job_db, jobnum_list):
		(header_list, job_env_dict_list, vn_set) = ([], [], set())
		for jobnum in jobnum_list:
			job_env_dict = self._task.get_job_dict(jobnum)
			vn_set.update(job_env_dict)
			job_env_dict.update(self._task.get_transient_variables())
			job_env_dict_list.append(job_env_dict)
		header_list.extend(imap(lambda key: (key, '<%s>' % key), self._task.get_transient_variables()))
		self._show_table(sorted(header_list + lzip(vn_set, vn_set)), job_env_dict_list)
Beispiel #20
0
	def show_report(self, job_db, jobnum_list):
		report_dict_list = []
		for jobnum in jobnum_list:
			job_obj = job_db.get_job_transient(jobnum)
			if job_obj.state != Job.INIT:
				report_dict_list.append({0: jobnum, 1: Job.enum2str(job_obj.state), 2: job_obj.gc_id})
				self._fill_report_dict_list(report_dict_list, job_obj)
		header_list = ['Job', 'Status / Attempt', 'Id / Destination']
		self._show_table(lzip(irange(3), header_list), report_dict_list, 'rcl')
def list_metadata(datasets, blocks):
	print('')
	for block in blocks:
		if len(datasets) > 1:
			print('Dataset: %s' % block[DataProvider.Dataset])
		print('Blockname: %s' % block[DataProvider.BlockName])
		mk_len = max(imap(len, block.get(DataProvider.Metadata, [''])))
		for f in block[DataProvider.FileList]:
			print('%s [%d events]' % (f[DataProvider.URL], f[DataProvider.NEntries]))
			print_metadata(lzip(block.get(DataProvider.Metadata, []), f.get(DataProvider.Metadata, [])), mk_len)
		print('')
Beispiel #22
0
 def show_report(self, job_db, jobnum_list):
     (header_list, job_env_dict_list, vn_set) = ([], [], set())
     for jobnum in jobnum_list:
         job_env_dict = self._task.get_job_dict(jobnum)
         vn_set.update(job_env_dict)
         job_env_dict.update(self._task.get_transient_variables())
         job_env_dict_list.append(job_env_dict)
     header_list.extend(
         imap(lambda key: (key, '<%s>' % key),
              self._task.get_transient_variables()))
     self._show_table(sorted(header_list + lzip(vn_set, vn_set)),
                      job_env_dict_list)
Beispiel #23
0
def display_metadata(dataset_list,
                     block,
                     metadata_key_list,
                     metadata_list,
                     base_header_list=None):
    header_list = [(DataProvider.BlockName, 'Block')] + (base_header_list or []) + \
     lzip(sorted(metadata_key_list), sorted(metadata_key_list))
    for metadata in metadata_list:
        metadata[DataProvider.Dataset] = block[DataProvider.Dataset]
        metadata[DataProvider.BlockName] = block.get(DataProvider.BlockName)
    title = get_title_update_header(dataset_list, header_list)
    ConsoleTable.create(header_list, metadata_list, title=title, pivot=True)
Beispiel #24
0
    def cancelJobs(self, wmsJobIdList):
        if len(wmsJobIdList) == 0:
            raise StopIteration
        self.debugOut("Started canceling: %s" % set(lzip(*wmsJobIdList)[0]))
        self.debugPool()

        wmsIdList = list(self._getRawIDs(wmsJobIdList))
        wmsIdArgument = " ".join(wmsIdList)
        wmsToJobMap = dict(wmsJobIdList)

        activity = utils.ActivityLog('cancelling jobs')
        cancelProcess = self.Pool.LoggedExecute(
            self.cancelExec, '%(jobIDs)s' % {"jobIDs": wmsIdArgument})

        # check if canceling actually worked
        for cancelReturnLine in cancelProcess.iter():
            if (cancelReturnLine != '\n') and ('marked for removal'
                                               in cancelReturnLine):
                try:
                    wmsID = cancelReturnLine.split()[1]
                    wmsIdList.remove(wmsID)
                    wmsID = self._createId(wmsID)
                    jobNum = wmsToJobMap[wmsID]
                    yield (jobNum, wmsID)
                except KeyError:  # mismatch in GC<->Condor mapping
                    self._log.error('Error with canceled condor job %s', wmsID)
                    self._log.error('\tCondor IDs: %s', wmsIdList)
                    self._log.error('\tProcess message: %s',
                                    cancelProcess.getMessage())
                    raise BackendError('Error while cancelling job %s' % wmsID)
            # clean up remote work dir
            if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH:
                cleanupProcess = self.Pool.LoggedExecute(
                    'rm -rf %s' % self.getWorkdirPath(jobNum))
                self.debugOut("Cleaning up remote workdir:\n	" +
                              cleanupProcess.cmd)
                if cleanupProcess.wait() != 0:
                    if self.explainError(cleanupProcess,
                                         cleanupProcess.wait()):
                        pass
                    else:
                        cleanupProcess.logError(self.errorLog)

        retCode = cancelProcess.wait()
        if retCode != 0:
            if self.explainError(cancelProcess, retCode):
                pass
            else:
                cancelProcess.logError(self.errorLog)
        # clean up if necessary
        activity.finish()
        self._tidyUpWorkingDirectory()
        self.debugFlush()
Beispiel #25
0
 def show_report(self, job_db, jobnum_list):
     report_dict_list = []
     for jobnum in jobnum_list:
         job_obj = job_db.get_job_transient(jobnum)
         if job_obj.state != Job.INIT:
             report_dict_list.append({
                 0: jobnum,
                 1: Job.enum2str(job_obj.state),
                 2: job_obj.gc_id
             })
             self._fill_report_dict_list(report_dict_list, job_obj)
     header_list = ['Job', 'Status / Attempt', 'Id / Destination']
     self._show_table(lzip(irange(3), header_list), report_dict_list, 'rcl')
Beispiel #26
0
 def display(self):
     reports = []
     for jobNum in self._jobs:
         jobObj = self._jobDB.getJob(jobNum)
         if not jobObj or (jobObj.state == Job.INIT):
             continue
         reports.append({
             0: jobNum,
             1: Job.enum2str(jobObj.state),
             2: jobObj.gcID
         })
         self._add_details(reports, jobObj)
     utils.printTabular(
         lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']),
         reports, 'rcl')
def backend_list(finder_name):
	finder = Plugin.get_class('BackendDiscovery').create_instance(finder_name, gc_create_config())
	item_dict_list = []
	item_key_set = set()
	for item_dict in finder.discover():
		nice_item_dict = {}
		for (key, value) in item_dict.items():
			if isinstance(key, int):
				key = WMS.enum2str(key)
			nice_item_dict[key] = value
			item_key_set.add(key)
		item_dict_list.append(nice_item_dict)
	item_key_set.remove('name')
	item_key_list = sorted(item_key_set)
	ConsoleTable.create([('name', 'Name')] + lzip(item_key_list, item_key_list), item_dict_list)
Beispiel #28
0
    def _getJobsOutput(self, wmsJobIdList):
        if not len(wmsJobIdList):
            raise StopIteration
        self.debugOut("Started retrieving: %s" % set(lzip(*wmsJobIdList)[0]))

        activity = utils.ActivityLog('retrieving job outputs')
        for wmsId, jobNum in wmsJobIdList:
            sandpath = self.getSandboxPath(jobNum)
            if sandpath is None:
                yield (jobNum, None)
                continue
            # when working with a remote spool schedd, tell condor to return files
            if self.remoteType == PoolType.SPOOL:
                transferProcess = self.Pool.LoggedExecute(
                    self.transferExec,
                    '%(jobID)s' % {"jobID": self._splitId(wmsId)})
                if transferProcess.wait() != 0:
                    if self.explainError(transferProcess,
                                         transferProcess.wait()):
                        pass
                    else:
                        transferProcess.logError(self.errorLog)
            # when working with a remote [gsi]ssh schedd, manually return files
            elif self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH:
                transferProcess = self.Pool.LoggedCopyFromRemote(
                    self.getWorkdirPath(jobNum), self.getSandboxPath())
                if transferProcess.wait() != 0:
                    if self.explainError(transferProcess,
                                         transferProcess.wait()):
                        pass
                    else:
                        transferProcess.logError(self.errorLog)
                # clean up remote working directory
                cleanupProcess = self.Pool.LoggedExecute(
                    'rm -rf %s' % self.getWorkdirPath(jobNum))
                self.debugOut("Cleaning up remote workdir: JobID %s\n	%s" %
                              (jobNum, cleanupProcess.cmd))
                if cleanupProcess.wait() != 0:
                    if self.explainError(cleanupProcess,
                                         cleanupProcess.wait()):
                        pass
                    else:
                        cleanupProcess.logError(self.errorLog)
            yield (jobNum, sandpath)
        # clean up if necessary
        activity.finish()
        self._tidyUpWorkingDirectory()
        self.debugFlush()
Beispiel #29
0
	def display(self):
		reports = []
		for jobNum in self._jobs:
			jobObj = self._jobDB.get(jobNum)
			if not jobObj or (jobObj.state == Job.INIT):
				continue
			reports.append({0: jobNum, 1: Job.enum2str(jobObj.state), 2: jobObj.wmsId})
			if utils.verbosity() > 0:
				history = jobObj.history.items()
				history.reverse()
				for at, dest in history:
					if dest != 'N/A':
						reports.append({1: at, 2: ' -> ' + dest})
			elif jobObj.get('dest', 'N/A') != 'N/A':
				reports.append({2: ' -> ' + jobObj.get('dest')})
		utils.printTabular(lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']), reports, 'rcl')
	def cancelJobs(self, wmsJobIdList):
		if len(wmsJobIdList) == 0:
			raise StopIteration
		self.debugOut("Started canceling: %s" % set(lzip(*wmsJobIdList)[0]))
		self.debugPool()

		wmsIdList=list(self._getRawIDs(wmsJobIdList))
		wmsIdArgument = " ".join(wmsIdList)
		wmsToJobMap = dict(wmsJobIdList)

		activity = utils.ActivityLog('cancelling jobs')
		cancelProcess = self.Pool.LoggedExecute(self.cancelExec, '%(jobIDs)s' % {"jobIDs" : wmsIdArgument })

		# check if canceling actually worked
		for cancelReturnLine in cancelProcess.iter():
			if ( cancelReturnLine!= '\n' ) and ( 'marked for removal' in cancelReturnLine ):
				try:
					wmsID=cancelReturnLine.split()[1]
					wmsIdList.remove(wmsID)
					wmsID=self._createId(wmsID)
					jobNum=wmsToJobMap[wmsID]
					yield ( jobNum, wmsID)
				except KeyError:	# mismatch in GC<->Condor mapping
					self._log.error('Error with canceled condor job %s', wmsID)
					self._log.error('\tCondor IDs: %s', wmsIdList)
					self._log.error('\tProcess message: %s', cancelProcess.getMessage())
					raise BackendError('Error while cancelling job %s' % wmsID)
			# clean up remote work dir
			if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH:
				cleanupProcess = self.Pool.LoggedExecute('rm -rf %s' % self.getWorkdirPath(jobNum) )
				self.debugOut("Cleaning up remote workdir:\n	" + cleanupProcess.cmd)
				if cleanupProcess.wait() != 0:
					if self.explainError(cleanupProcess, cleanupProcess.wait()):
						pass
					else:
						cleanupProcess.logError(self.errorLog)

		retCode = cancelProcess.wait()
		if retCode != 0:
			if self.explainError(cancelProcess, retCode):
				pass
			else:
				cancelProcess.logError(self.errorLog)
		# clean up if necessary
		activity.finish()
		self._tidyUpWorkingDirectory()
		self.debugFlush()
Beispiel #31
0
def backend_list(finder_name):
    finder = Plugin.get_class('BackendDiscovery').create_instance(
        finder_name, gc_create_config())
    item_dict_list = []
    item_key_set = set()
    for item_dict in finder.discover():
        nice_item_dict = {}
        for (key, value) in item_dict.items():
            if isinstance(key, int):
                key = WMS.enum2str(key)
            nice_item_dict[key] = value
            item_key_set.add(key)
        item_dict_list.append(nice_item_dict)
    item_key_set.remove('name')
    item_key_list = sorted(item_key_set)
    ConsoleTable.create([('name', 'Name')] +
                        lzip(item_key_list, item_key_list), item_dict_list)
Beispiel #32
0
def _draw_pie(numpy, axis, js_dict, pos, size, piecolor=None):
    def _sum(job_class):
        return sum(imap(js_dict.get, job_class.state_list))

    piecolor = piecolor or ['red', 'orange', 'green', 'blue', 'purple']
    breakdown = lmap(_sum, [
        JobClass.FAILING, JobClass.RUNNING, JobClass.SUCCESS, JobClass.DONE,
        JobClass.ATWMS
    ])
    breakdown = [0] + list(numpy.cumsum(breakdown) * 1.0 / sum(breakdown))
    for idx in irange(len(breakdown) - 1):
        fracs = numpy.linspace(2 * math.pi * breakdown[idx],
                               2 * math.pi * breakdown[idx + 1], 20)
        loc_x = [0] + numpy.cos(fracs).tolist()
        loc_y = [0] + numpy.sin(fracs).tolist()
        axis.scatter(pos[0],
                     pos[1],
                     marker=(lzip(loc_x, loc_y), 0),
                     s=size,
                     facecolor=piecolor[idx % len(piecolor)])
	def _parseParameterTuple(self, varName, tupleValue, tupleType, varType, varIndex):
		if tupleType == 'tuple':
			tupleDelimeter = self.get(self._getParameterOption(varName), 'delimeter', ',')
			tupleStrings = lmap(str.strip, utils.split_advanced(tupleValue, lambda tok: tok in ' \n', lambda tok: False))
			tupleList = lmap(lambda t: parseTuple(t, tupleDelimeter), tupleStrings)
		elif tupleType == 'binning':
			tupleList = lzip(tupleValue.split(), tupleValue.split()[1:])

		result = []
		for tupleEntry in tupleList:
			try:
				tmp = self._parseParameter(varName, tupleEntry[varIndex], varType)
			except Exception:
				raise ConfigError('Unable to parse %r' % repr((tupleEntry, tupleStrings)))
			if isinstance(tmp, list):
				if len(tmp) != 1:
					raise ConfigError('[Variable: %s] Tuple entry (%s) expands to multiple variable entries (%s)!' % (varName, tupleEntry[varIndex], tmp))
				result.append(tmp[0])
			else:
				result.append(tmp)
		return result
Beispiel #34
0
	def _getJobsOutput(self, wmsJobIdList):
		if not len(wmsJobIdList):
			raise StopIteration
		self.debugOut("Started retrieving: %s" % set(lzip(*wmsJobIdList)[0]))

		activity = Activity('retrieving job outputs')
		for gcID, jobNum in wmsJobIdList:
			sandpath = self.getSandboxPath(jobNum)
			if sandpath is None:
				yield (jobNum, None)
				continue
			# when working with a remote spool schedd, tell condor to return files
			if self.remoteType == PoolType.SPOOL:
				transferProcess = self.Pool.LoggedExecute(self.transferExec, '%(jobID)s' % {"jobID" : self._splitId(gcID) })
				if transferProcess.wait() != 0:
					if self.explainError(transferProcess, transferProcess.wait()):
						pass
					else:
						transferProcess.logError(self.errorLog)
			# when working with a remote [gsi]ssh schedd, manually return files
			elif self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH:
				transferProcess = self.Pool.LoggedCopyFromRemote( self.getWorkdirPath(jobNum), self.getSandboxPath())
				if transferProcess.wait() != 0:
					if self.explainError(transferProcess, transferProcess.wait()):
						pass
					else:
						transferProcess.logError(self.errorLog)
				# clean up remote working directory
				cleanupProcess = self.Pool.LoggedExecute('rm -rf %s' % self.getWorkdirPath(jobNum) )
				self.debugOut("Cleaning up remote workdir: JobID %s\n	%s"%(jobNum,cleanupProcess.cmd))
				if cleanupProcess.wait() != 0:
					if self.explainError(cleanupProcess, cleanupProcess.wait()):
						pass
					else:
						cleanupProcess.logError(self.errorLog)
			yield (jobNum, sandpath)
		# clean up if necessary
		activity.finish()
		self._tidyUpWorkingDirectory()
		self.debugFlush()
Beispiel #35
0
 def display(self):
     reports = []
     for jobNum in self._jobs:
         jobObj = self._jobDB.get(jobNum)
         if not jobObj or (jobObj.state == Job.INIT):
             continue
         reports.append({
             0: jobNum,
             1: Job.enum2str(jobObj.state),
             2: jobObj.wmsId
         })
         if utils.verbosity() > 0:
             history = jobObj.history.items()
             history.reverse()
             for at, dest in history:
                 if dest != 'N/A':
                     reports.append({1: at, 2: ' -> ' + dest})
         elif jobObj.get('dest', 'N/A') != 'N/A':
             reports.append({2: ' -> ' + jobObj.get('dest')})
     utils.printTabular(
         lzip(irange(3), ['Job', 'Status / Attempt', 'Id / Destination']),
         reports, 'rcl')
	def __init__(self, head, data, fmt = None, top = True):
		self.table = """
<style type="text/css">
	table {font-size:12px;color:#333333;border-width: 1px;border-color: #7799aa;border-collapse: collapse;}
	th {font-size:12px;background-color:#aacccc;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;text-align:left;}
	tr {background-color:#ffffff;}
	td {font-size:12px;border-width: 1px;padding: 8px;border-style: solid;border-color: #7799aa;}
</style>"""
		fmt = fmt or {}
		lookupDict = lmap(lambda id_name: (id_name[0], fmt.get(id_name[0], str)), head)
		headerList = lmap(lambda id_name: '<th>%s</th>' % id_name[1], head)
		def entryList(entry):
			return lmap(lambda id_fmt: '<td>%s</td>' % id_fmt[1](entry.get(id_fmt[0])), lookupDict)
		rowList = [headerList] + lmap(entryList, data)
		if not top:
			rowList = lzip(*rowList)
		rows = lmap(lambda row: '\t<tr>%s</tr>\n' % str.join('', row), rowList)
		if top:
			widthStr = 'width:100%;'
		else:
			widthStr = ''
		self.table += '<table style="%s" border="1">\n%s</table>' % (widthStr, str.join('', rows))
Beispiel #37
0
	def show_report(self, job_db, jobnum_list):
		state_map = dict(self._state_map)

		def _transform(data, label, level):
			if None in data:
				total = data.pop(None)
				if len(data) > 1:
					for result in self._get_entry(state_map, total, ['Total']):
						yield result
					yield '='
			for idx, entry in enumerate(sorted(data)):
				if level == 1:
					for result in self._get_entry(state_map, data[entry], [entry] + label):
						yield result
				else:
					for result in _transform(data[entry], [entry] + label, level - 1):
						yield result
				if idx != len(data) - 1:
					yield '-'
		stats = self._get_hierachical_stats_dict(job_db, jobnum_list)
		displace_states_list = lmap(itemgetter(1), self._state_map)
		header = [('', 'Category')] + lzip(displace_states_list, displace_states_list)
		self._show_table(header, _transform(stats, [], len(self._idx_list)),
			align_str='l' + 'c' * len(state_map), fmt_dict={'': lambda x: str.join(' ', x)})
Beispiel #38
0
    def get_body(self):
        fmt_dict = self._fmt_dict or {}
        lookup_dict = lmap(
            lambda id_name: (id_name[0], fmt_dict.get(id_name[0], str)),
            self._head)
        header_list = lmap(lambda id_name: _tag('th', id_name[1]), self._head)

        def _make_entry_list(entry):
            return lmap(
                lambda id_fmt: _tag('td', id_fmt[1](entry.get(id_fmt[0]))),
                lookup_dict)

        row_list = [header_list] + lmap(_make_entry_list, self._data)
        width_str = 'width:100%;'
        if not self._pivot:
            row_list = lzip(*row_list)
            width_str = ''
        return _tag('table',
                    str.join(
                        '',
                        lmap(lambda row: _tag('tr', str.join('', row)),
                             row_list)),
                    style=width_str,
                    border=1)
	def jobs(self, *args, **kw):
		result = '<body>'
		result += str(CPProgressBar(0, min(100, self.counter), 100, 300))
		if 'job' in kw:
			jobNum = int(kw['job'])
			info = self.task.getJobConfig(jobNum)
			result += str(TabularHTML(lzip(sorted(info), sorted(info)), [info], top = False))
		def getJobObjs():
			for jobNum in self.jobMgr.jobDB.getJobs():
				result = self.jobMgr.jobDB.get(jobNum).__dict__
				result['jobNum'] = jobNum
				result.update(result['dict'])
				yield result
		fmtTime = lambda t: time.strftime('%Y-%m-%d %T', time.localtime(t))
		result += str(TabularHTML([
				('jobNum', 'Job'), ('state', 'Status'), ('attempt', 'Attempt'),
				('wmsId', 'WMS ID'), ('dest', 'Destination'), ('submitted', 'Submitted')
			], getJobObjs(),
			fmt = {
				'jobNum': lambda x: '<a href="jobs?job=%s">%s</a>' % (x, x),
				'state': Job.enum2str, 'submitted': fmtTime
			}, top = True))
		result += '</body>'
		return result
Beispiel #40
0
	if len(args) != 1:
		utils.exitWithUsage(parser.usage('part'))
	splitter = DataSplitter.loadPartitionsForScript(args[0])

	if opts.partition_list_invalid:
		utils.printTabular([(0, 'Job')], partition_invalid(splitter))

	if opts.partition_list is not None:
		if opts.partition_list in ('', 'all'):
			keyStrings = DataSplitter.enumNames
		else:
			keyStrings = opts.partition_list.split(',')
		keyList = lmap(DataSplitter.str2enum, keyStrings)
		if None in keyList:
			logging.warning('Available keys: %r', DataSplitter.enumNames)
		utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings), partition_list(splitter, keyList))

	if opts.partition_check:
		logging.info('Checking %d jobs...', splitter.getMaxJobs())
		partition_check(splitter)

########################################################
# JOBS

def jobs_reset_attempts(jobDB, selected):
	for jobNum in jobDB.getJobsIter(selected):
		logging.info('Resetting attempts for job %d', jobNum)
		jobObj = jobDB.getJob(jobNum)
		jobObj.attempt = 0
		jobObj.history = {}
		for key in jobObj.dict.keys():
Beispiel #41
0
    if len(args) != 1:
        utils.exitWithUsage(parser.usage('part'))
    splitter = DataSplitter.loadStateForScript(args[0])

    if opts.partition_list_invalid:
        utils.printTabular([(0, 'Job')], partition_invalid(splitter))

    if opts.partition_list is not None:
        if opts.partition_list:
            keyStrings = opts.partition_list.split(',')
        else:
            keyStrings = DataSplitter.enumNames
        keyList = lmap(DataSplitter.str2enum, keyStrings)
        if None in keyList:
            logging.warning('Available keys: %r', DataSplitter.enumNames)
        utils.printTabular([('jobNum', 'Job')] + lzip(keyList, keyStrings),
                           partition_list(splitter, keyList))

    if opts.partition_check:
        logging.info('Checking %d jobs...', splitter.getMaxJobs())
        partition_check(splitter)

########################################################
# JOBS


def jobs_reset_attempts(jobDB, selected):
    for jobNum in jobDB.getJobsIter(selected):
        logging.info('Resetting attempts for job %d', jobNum)
        jobinfo = jobDB.get(jobNum)
        jobinfo.attempt = 0
	def checkJobs(self, wmsJobIdList):
		if len(wmsJobIdList) == 0:
			raise StopIteration
		self.debugOut('Started checking: %s' % set(lzip(*wmsJobIdList)[0]))
		self.debugPool()

		wmsIdList=list(self._getRawIDs(wmsJobIdList))
		wmsIdArgument = ' '.join(wmsIdList)
		wmsToJobMap = dict(wmsJobIdList)

		activity = utils.ActivityLog('fetching job status')
		statusProcess = self.Pool.LoggedExecute(self.statusExec, '%(format)s %(jobIDs)s' % {"jobIDs" : wmsIdArgument, "format" : self.statusReturnFormat })
		activity.finish()

		activity = utils.ActivityLog('checking job status')
		# process all lines of the status executable output
		utils.vprint('querrying condor_q', 2)
		for statusReturnLine in statusProcess.iter():
			try:
				# test if wmsID job was requested, then extact data and remove from check list
				if statusReturnLine.split()[0] in wmsIdList:
					( jobID, wmsID, status, jobinfo ) = self._statusReturnLineRead(statusReturnLine)
					wmsIdList.remove(wmsID)
					yield ( jobID, self._createId(wmsID), status, jobinfo )
			except Exception:
				raise BackendError('Error reading job status info:\n%s' % statusReturnLine)

		# cleanup after final yield
		retCode = statusProcess.wait()
		if retCode != 0:
			if self.explainError(statusProcess, retCode):
				pass
			else:
				statusProcess.logError(self.errorLog, brief=True)
		activity.finish()

		self.debugOut("Remaining after condor_q: %s" % wmsIdList)
		# jobs not in queue have either succeeded or failed - both is considered 'Done' for GC
		# if no additional information is required, consider everything we couldn't find as done
		if retCode == 0:
			for wmsID in list(wmsIdList):
				wmsIdList.remove(wmsID)
				wmsID=self._createId(wmsID)
				yield ( wmsToJobMap[wmsID], wmsID, Job.DONE, {} )
		# TODO: querry log on properly configured pool
		# querying the history can be SLOW! only do when necessary and possible
		if False and len(wmsIdList) > 0 and self.remoteType != PoolType.SPOOL:
			utils.vprint('querrying condor_history', 2)
			# querying the history can be VERY slow! Only do so bit by bit if possible
			if self.historyFile:
				historyList = sorted([ "-f "+ file for file in ifilter(os.path.isfile, glob.glob(self.historyFile+"*")) ])
			else:
				historyList=[""]
			# query the history file by file until no more jobs need updating
			for historyFile in historyList:
				if len(wmsIdList) > 0:
					statusArgs = '%(fileQuery)s %(format)s %(jobIDs)s' % {"fileQuery": historyFile, "jobIDs" : " ", "format" : self.statusReturnFormat}
					statusProcess = self.Pool.LoggedExecute(self.historyExec, statusArgs)
					for statusReturnLine in statusProcess.iter():
						# test if line starts with a number and was requested
						try:
							# test if wmsID job was requested, then extact data and remove from check list
							if statusReturnLine.split()[0] in wmsIdList:
								( jobID, wmsID, status, jobinfo ) = self._statusReturnLineRead(statusReturnLine)
								wmsIdList.remove(wmsID)
								yield ( jobID, self._createId(wmsID), status, jobinfo )
						except Exception:
							raise BackendError('Error reading job status info:\n%s' % statusReturnLine)

					# cleanup after final yield
					retCode = statusProcess.wait()
					if retCode != 0:
						if self.explainError(statusProcess, retCode):
							pass
						else:
							statusProcess.logError(self.errorLog, brief=True)
		self.debugFlush()
Beispiel #43
0
	def submitJobs(self, jobNumListFull, module):
		submitBatch=25
		for index in irange(0, len(jobNumListFull), submitBatch):
			jobNumList=jobNumListFull[index:index+submitBatch]
			self.debugOut("\nStarted submitting: %s" % jobNumList)
			self.debugPool()
			# get the full job config path and basename
			def _getJobCFG(jobNum):
				return os.path.join(self.getSandboxPath(jobNum), 'job_%d.var' % jobNum), 'job_%d.var' % jobNum
			activity = Activity('preparing jobs')
			# construct a temporary JDL for this batch of jobs
			jdlDescriptor, jdlFilePath = tempfile.mkstemp(suffix='.jdl')
			jdlSubmitPath = jdlFilePath
			self.debugOut("Writing temporary jdl to: "+jdlSubmitPath)
			try:
				data = self.makeJDLdata(jobNumList, module)
				utils.safeWrite(os.fdopen(jdlDescriptor, 'w'), data)
			except Exception:
				utils.removeFiles([jdlFilePath])
				raise BackendError('Could not write jdl data to %s.' % jdlFilePath)

			# create the _jobconfig.sh file containing the actual data
			for jobNum in jobNumList:
				try:
					self._writeJobConfig(_getJobCFG(jobNum)[0], jobNum, module, {})
				except Exception:
					raise BackendError('Could not write _jobconfig data for %s.' % jobNum)

			self.debugOut("Copying to remote")
			# copy infiles to ssh/gsissh remote pool if required
			if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH:
				activity = Activity('preparing remote scheduler')
				self.debugOut("Copying to sandbox")
				workdirBase = self.getWorkdirPath()
				# TODO: check whether shared remote files already exist and copy otherwise
				for _, fileSource, fileTarget in self._getSandboxFilesIn(module):
					copyProcess = self.Pool.LoggedCopyToRemote(fileSource, os.path.join(workdirBase, fileTarget))
					if copyProcess.wait() != 0:
						if self.explainError(copyProcess, copyProcess.wait()):
							pass
						else:
							copyProcess.logError(self.errorLog, brief=True)
					self.debugFlush()
				# copy job config files
				self.debugOut("Copying job configs")
				for jobNum in jobNumList:
					fileSource, fileTarget = _getJobCFG(jobNum)
					copyProcess = self.Pool.LoggedCopyToRemote(fileSource, os.path.join(self.getWorkdirPath(jobNum), fileTarget))
					if copyProcess.wait() != 0:
						if self.explainError(copyProcess, copyProcess.wait()):
							pass
						else:
							copyProcess.logError(self.errorLog, brief=True)
					self.debugFlush()
				# copy jdl
				self.debugOut("Copying jdl")
				jdlSubmitPath = os.path.join(workdirBase, os.path.basename(jdlFilePath))
				copyProcess = self.Pool.LoggedCopyToRemote(jdlFilePath, jdlSubmitPath )
				if copyProcess.wait() != 0:
					if self.explainError(copyProcess, copyProcess.wait()):
						pass
					else:
						copyProcess.logError(self.errorLog, brief=True)
				self.debugFlush()
				# copy proxy
				for authFile in self._token.getAuthFiles():
					self.debugOut("Copying proxy")
					copyProcess = self.Pool.LoggedCopyToRemote(authFile, os.path.join(self.getWorkdirPath(), os.path.basename(authFile)))
					if copyProcess.wait() != 0:
						if self.explainError(copyProcess, copyProcess.wait()):
							pass
						else:
							copyProcess.logError(self.errorLog, brief=True)
					self.debugFlush()


			self.debugOut("Starting jobs")
			try:
				# submit all jobs simultaneously and temporarily store verbose (ClassAdd) output
				activity = Activity('queuing jobs at scheduler')
				proc = self.Pool.LoggedExecute(self.submitExec, ' -verbose %(JDL)s' % { "JDL": jdlSubmitPath })

				self.debugOut("AAAAA")
				# extract the Condor ID (WMS ID) of the jobs from output ClassAds
				wmsJobIdList = []
				for line in proc.iter():
					if "GridControl_GCIDtoWMSID" in line:
						GCWMSID=line.split('=')[1].strip(' "\n').split('@')
						GCID,WMSID=int(GCWMSID[0]),GCWMSID[1].strip()
						# Condor creates a default job then overwrites settings on any subsequent job - i.e. skip every second, but better be sure
						if ( not wmsJobIdList ) or ( GCID not in lzip(*wmsJobIdList)[0] ):
							wmsJobIdList.append((self._createId(WMSID),GCID))
					if "GridControl_GCtoWMSID" in line:
						self.debugOut("o : %s" % line)
						self.debugOut("o : %s" % wmsJobIdList)

				retCode = proc.wait()
				activity.finish()
				if (retCode != 0) or ( len(wmsJobIdList) < len(jobNumList) ):
					if self.explainError(proc, retCode):
						pass
					else:
						self._log.error('Submitted %4d jobs of %4d expected', len(wmsJobIdList), len(jobNumList))
						proc.logError(self.errorLog, jdl = jdlFilePath)
			finally:
				utils.removeFiles([jdlFilePath])
			self.debugOut("Done Submitting")

			# yield the (jobNum, WMS ID, other data) of each job successively
			for index in irange(len(wmsJobIdList)):
				yield (wmsJobIdList[index][1], wmsJobIdList[index][0], {} )
			self.debugOut("Yielded submitted job")
			self.debugFlush()
Beispiel #44
0
    def checkJobs(self, wmsJobIdList):
        if len(wmsJobIdList) == 0:
            raise StopIteration
        self.debugOut('Started checking: %s' % set(lzip(*wmsJobIdList)[0]))
        self.debugPool()

        wmsIdList = list(self._getRawIDs(wmsJobIdList))
        wmsIdArgument = ' '.join(wmsIdList)
        wmsToJobMap = dict(wmsJobIdList)

        activity = utils.ActivityLog('fetching job status')
        statusProcess = self.Pool.LoggedExecute(
            self.statusExec, '%(format)s %(jobIDs)s' % {
                "jobIDs": wmsIdArgument,
                "format": self.statusReturnFormat
            })
        activity.finish()

        activity = utils.ActivityLog('checking job status')
        # process all lines of the status executable output
        utils.vprint('querrying condor_q', 2)
        for statusReturnLine in statusProcess.iter():
            try:
                # test if wmsID job was requested, then extact data and remove from check list
                if statusReturnLine.split()[0] in wmsIdList:
                    (jobID, wmsID, status,
                     jobinfo) = self._statusReturnLineRead(statusReturnLine)
                    wmsIdList.remove(wmsID)
                    yield (jobID, self._createId(wmsID), status, jobinfo)
            except Exception:
                raise BackendError('Error reading job status info:\n%s' %
                                   statusReturnLine)

        # cleanup after final yield
        retCode = statusProcess.wait()
        if retCode != 0:
            if self.explainError(statusProcess, retCode):
                pass
            else:
                statusProcess.logError(self.errorLog, brief=True)
        activity.finish()

        self.debugOut("Remaining after condor_q: %s" % wmsIdList)
        # jobs not in queue have either succeeded or failed - both is considered 'Done' for GC
        # if no additional information is required, consider everything we couldn't find as done
        if retCode == 0:
            for wmsID in list(wmsIdList):
                wmsIdList.remove(wmsID)
                wmsID = self._createId(wmsID)
                yield (wmsToJobMap[wmsID], wmsID, Job.DONE, {})
        # TODO: querry log on properly configured pool
        # querying the history can be SLOW! only do when necessary and possible
        if False and len(wmsIdList) > 0 and self.remoteType != PoolType.SPOOL:
            utils.vprint('querrying condor_history', 2)
            # querying the history can be VERY slow! Only do so bit by bit if possible
            if self.historyFile:
                historyList = sorted([
                    "-f " + file for file in ifilter(
                        os.path.isfile, glob.glob(self.historyFile + "*"))
                ])
            else:
                historyList = [""]
            # query the history file by file until no more jobs need updating
            for historyFile in historyList:
                if len(wmsIdList) > 0:
                    statusArgs = '%(fileQuery)s %(format)s %(jobIDs)s' % {
                        "fileQuery": historyFile,
                        "jobIDs": " ",
                        "format": self.statusReturnFormat
                    }
                    statusProcess = self.Pool.LoggedExecute(
                        self.historyExec, statusArgs)
                    for statusReturnLine in statusProcess.iter():
                        # test if line starts with a number and was requested
                        try:
                            # test if wmsID job was requested, then extact data and remove from check list
                            if statusReturnLine.split()[0] in wmsIdList:
                                (jobID, wmsID, status,
                                 jobinfo) = self._statusReturnLineRead(
                                     statusReturnLine)
                                wmsIdList.remove(wmsID)
                                yield (jobID, self._createId(wmsID), status,
                                       jobinfo)
                        except Exception:
                            raise BackendError(
                                'Error reading job status info:\n%s' %
                                statusReturnLine)

                    # cleanup after final yield
                    retCode = statusProcess.wait()
                    if retCode != 0:
                        if self.explainError(statusProcess, retCode):
                            pass
                        else:
                            statusProcess.logError(self.errorLog, brief=True)
        self.debugFlush()
Beispiel #45
0
    def submitJobs(self, jobNumListFull, module):
        submitBatch = 25
        for index in irange(0, len(jobNumListFull), submitBatch):
            jobNumList = jobNumListFull[index:index + submitBatch]
            self.debugOut("\nStarted submitting: %s" % jobNumList)
            self.debugPool()

            # get the full job config path and basename
            def _getJobCFG(jobNum):
                return os.path.join(self.getSandboxPath(jobNum), 'job_%d.var' %
                                    jobNum), 'job_%d.var' % jobNum

            activity = utils.ActivityLog('preparing jobs')
            # construct a temporary JDL for this batch of jobs
            jdlDescriptor, jdlFilePath = tempfile.mkstemp(suffix='.jdl')
            jdlSubmitPath = jdlFilePath
            self.debugOut("Writing temporary jdl to: " + jdlSubmitPath)
            try:
                data = self.makeJDLdata(jobNumList, module)
                utils.safeWrite(os.fdopen(jdlDescriptor, 'w'), data)
            except Exception:
                utils.removeFiles([jdlFilePath])
                raise BackendError('Could not write jdl data to %s.' %
                                   jdlFilePath)

            # create the _jobconfig.sh file containing the actual data
            for jobNum in jobNumList:
                try:
                    self._writeJobConfig(
                        _getJobCFG(jobNum)[0], jobNum, module, {})
                except Exception:
                    raise BackendError(
                        'Could not write _jobconfig data for %s.' % jobNum)

            self.debugOut("Copying to remote")
            # copy infiles to ssh/gsissh remote pool if required
            if self.remoteType == PoolType.SSH or self.remoteType == PoolType.GSISSH:
                activity = utils.ActivityLog('preparing remote scheduler')
                self.debugOut("Copying to sandbox")
                workdirBase = self.getWorkdirPath()
                # TODO: check whether shared remote files already exist and copy otherwise
                for _, fileSource, fileTarget in self._getSandboxFilesIn(
                        module):
                    copyProcess = self.Pool.LoggedCopyToRemote(
                        fileSource, os.path.join(workdirBase, fileTarget))
                    if copyProcess.wait() != 0:
                        if self.explainError(copyProcess, copyProcess.wait()):
                            pass
                        else:
                            copyProcess.logError(self.errorLog, brief=True)
                    self.debugFlush()
                # copy job config files
                self.debugOut("Copying job configs")
                for jobNum in jobNumList:
                    fileSource, fileTarget = _getJobCFG(jobNum)
                    copyProcess = self.Pool.LoggedCopyToRemote(
                        fileSource,
                        os.path.join(self.getWorkdirPath(jobNum), fileTarget))
                    if copyProcess.wait() != 0:
                        if self.explainError(copyProcess, copyProcess.wait()):
                            pass
                        else:
                            copyProcess.logError(self.errorLog, brief=True)
                    self.debugFlush()
                # copy jdl
                self.debugOut("Copying jdl")
                jdlSubmitPath = os.path.join(workdirBase,
                                             os.path.basename(jdlFilePath))
                copyProcess = self.Pool.LoggedCopyToRemote(
                    jdlFilePath, jdlSubmitPath)
                if copyProcess.wait() != 0:
                    if self.explainError(copyProcess, copyProcess.wait()):
                        pass
                    else:
                        copyProcess.logError(self.errorLog, brief=True)
                self.debugFlush()
                # copy proxy
                for authFile in self._token.getAuthFiles():
                    self.debugOut("Copying proxy")
                    copyProcess = self.Pool.LoggedCopyToRemote(
                        authFile,
                        os.path.join(self.getWorkdirPath(),
                                     os.path.basename(authFile)))
                    if copyProcess.wait() != 0:
                        if self.explainError(copyProcess, copyProcess.wait()):
                            pass
                        else:
                            copyProcess.logError(self.errorLog, brief=True)
                    self.debugFlush()

            self.debugOut("Starting jobs")
            try:
                # submit all jobs simultaneously and temporarily store verbose (ClassAdd) output
                activity = utils.ActivityLog('queuing jobs at scheduler')
                proc = self.Pool.LoggedExecute(
                    self.submitExec,
                    ' -verbose %(JDL)s' % {"JDL": jdlSubmitPath})

                self.debugOut("AAAAA")
                # extract the Condor ID (WMS ID) of the jobs from output ClassAds
                wmsJobIdList = []
                for line in proc.iter():
                    if "GridControl_GCIDtoWMSID" in line:
                        GCWMSID = line.split('=')[1].strip(' "\n').split('@')
                        GCID, WMSID = int(GCWMSID[0]), GCWMSID[1].strip()
                        # Condor creates a default job then overwrites settings on any subsequent job - i.e. skip every second, but better be sure
                        if (not wmsJobIdList) or (GCID not in lzip(
                                *wmsJobIdList)[0]):
                            wmsJobIdList.append((self._createId(WMSID), GCID))
                    if "GridControl_GCtoWMSID" in line:
                        self.debugOut("o : %s" % line)
                        self.debugOut("o : %s" % wmsJobIdList)

                retCode = proc.wait()
                activity.finish()
                if (retCode != 0) or (len(wmsJobIdList) < len(jobNumList)):
                    if self.explainError(proc, retCode):
                        pass
                    else:
                        utils.eprint("Submitted %4d jobs of %4d expected" %
                                     (len(wmsJobIdList), len(jobNumList)))
                        proc.logError(self.errorLog, jdl=jdlFilePath)
            finally:
                utils.removeFiles([jdlFilePath])
            self.debugOut("Done Submitting")

            # yield the (jobNum, WMS ID, other data) of each job successively
            for index in irange(len(wmsJobIdList)):
                yield (wmsJobIdList[index][1], wmsJobIdList[index][0], {})
            self.debugOut("Yielded submitted job")
            self.debugFlush()
Beispiel #46
0
 def show_report(self, job_db, jobnum_list):
     self._show_table(lzip(Job.enum_value_list, Job.enum_name_list),
                      [self._get_job_state_dict(job_db, jobnum_list)],
                      pivot=True)
Beispiel #47
0
 def parse_tuples(self, pconfig, varexpr, output_vn, value):
     # eg. '11 12 13 14' -> [(11, 12), (12, 13), (13, 14)] -> [12, 13, 14]
     tuple_token_list = value.split()
     return lzip(tuple_token_list, tuple_token_list[1:])
Beispiel #48
0
	def show_report(self, job_db, jobnum_list):
		self._show_table(lzip(Job.enum_value_list, Job.enum_name_list),
			[self._get_job_state_dict(job_db, jobnum_list)], pivot=True)
Beispiel #49
0
def replaceDict(result, allVars, varMapping = None):
	for (virtual, real) in (varMapping or lzip(allVars.keys(), allVars.keys())):
		for delim in ['@', '__']:
			result = result.replace(delim + virtual + delim, str(allVars.get(real, '')))
	return result
Beispiel #50
0
	def parse_tuples(self, pconfig, varexpr, output_vn, value):
		# eg. '11 12 13 14' -> [(11, 12), (12, 13), (13, 14)] -> [12, 13, 14]
		tuple_token_list = value.split()
		return lzip(tuple_token_list, tuple_token_list[1:])
Beispiel #51
0
def replace_with_dict(value, mapping_values, mapping_keys=None):
	mapping_keys = mapping_keys or lzip(mapping_values.keys(), mapping_values.keys())
	for (virtual, real) in mapping_keys:
		for delim in ['@', '__']:
			value = value.replace(delim + virtual + delim, str(mapping_values.get(real, '')))
	return value