Ejemplo n.º 1
0
def split_brackets(tokens, brackets = None, exType = Exception):
	if brackets is None:
		brackets = ['()', '{}', '[]']
	buffer = ''
	stack_bracket = []
	map_close_to_open = dict(imap(lambda x: (x[1], x[0]), brackets))
	position = 0
	for token in tokens:
		position += len(token) # store position for proper error messages
		if token in map_close_to_open.values():
			stack_bracket.append((token, position))
		if token in map_close_to_open.keys():
			if not stack_bracket:
				raise exType('Closing bracket %r at position %d is without opening bracket' % (token, position))
			elif stack_bracket[-1][0] == map_close_to_open[token]:
				stack_bracket.pop()
				if not stack_bracket:
					buffer += token
					yield buffer
					buffer = ''
					continue
			else:
				raise exType('Closing bracket %r at position %d does not match bracket %r at position %d' % (token, position, stack_bracket[-1][0], stack_bracket[-1][1]))
		if stack_bracket:
			buffer += token
		else:
			yield token
	if stack_bracket:
		raise exType('Unclosed brackets %s' % str.join(', ', imap(lambda b_pos: '%r at position %d' % b_pos, stack_bracket)))
Ejemplo n.º 2
0
	def _resyncInternal(self): # This function is _VERY_ time critical!
		tmp = self._rawSource.resync() # First ask about psource changes
		(redoNewPNum, disableNewPNum, sizeChange) = (set(tmp[0]), set(tmp[1]), tmp[2])
		hashNew = self._rawSource.getHash()
		hashChange = self._storedHash != hashNew
		self._storedHash = hashNew
		if not (redoNewPNum or disableNewPNum or sizeChange or hashChange):
			self._resyncState = None
			return

		psource_old = ParameterAdapter(None, ParameterSource.createInstance('GCDumpParameterSource', self._pathParams))
		psource_new = ParameterAdapter(None, self._rawSource)

		mapJob2PID = {}
		(pAdded, pMissing, _) = self._diffParams(psource_old, psource_new, mapJob2PID, redoNewPNum, disableNewPNum)
		self._source = self._getResyncSource(psource_old, psource_new, mapJob2PID, pAdded, pMissing, disableNewPNum)

		self._mapJob2PID = mapJob2PID # Update Job2PID map
		redoNewPNum = redoNewPNum.difference(disableNewPNum)
		if redoNewPNum or disableNewPNum:
			mapPID2Job = dict(ismap(utils.swap, self._mapJob2PID.items()))
			translate = lambda pNum: mapPID2Job.get(pNum, pNum)
			self._resyncState = (set(imap(translate, redoNewPNum)), set(imap(translate, disableNewPNum)), sizeChange)
		elif sizeChange:
			self._resyncState = (set(), set(), sizeChange)
		# Write resynced state
		self._writeJob2PID(self._pathJob2PID + '.tmp')
		ParameterSource.getClass('GCDumpParameterSource').write(self._pathParams + '.tmp', self)
		os.rename(self._pathJob2PID + '.tmp', self._pathJob2PID)
		os.rename(self._pathParams + '.tmp', self._pathParams)
Ejemplo n.º 3
0
def collapse_psp_list(psp_list, tracked_list, opts):
	psp_dict = {}
	psp_dict_nicks = {}
	header_list = [('COLLATE_JOBS', '# of jobs')]
	if 'DATASETSPLIT' in tracked_list:
		tracked_list.remove('DATASETSPLIT')
		if opts.collapse == 1:
			tracked_list.append('DATASETNICK')
			header_list.append(('DATASETNICK', 'DATASETNICK'))
		elif opts.collapse == 2:
			header_list.append(('COLLATE_NICK', '# of nicks'))
	for pset in psp_list:
		if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
			pset.pop('DATASETSPLIT')
		nickname = None
		if ('DATASETNICK' in pset) and (opts.collapse == 2):
			nickname = pset.pop('DATASETNICK')
		hash_str = md5_hex(repr(lmap(lambda key: pset.get(str(key)), tracked_list)))
		psp_dict.setdefault(hash_str, []).append(pset)
		psp_dict_nicks.setdefault(hash_str, set()).add(nickname)

	def _do_collate(hash_str):
		psp = psp_dict[hash_str][0]
		psp['COLLATE_JOBS'] = len(psp_dict[hash_str])
		psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str])
		return psp
	psp_list = sorted(imap(_do_collate, psp_dict), key=lambda x: tuple(imap(str, x.values())))
	return (header_list, psp_list)
Ejemplo n.º 4
0
	def get_job_dict(self, jobnum):
		# Get job dependent environment variables
		job_env_dict = SCRAMTask.get_job_dict(self, jobnum)
		if not self._has_dataset:
			job_env_dict['MAX_EVENTS'] = self._events_per_job
		job_env_dict.update(dict(self._cmssw_search_dict))
		if self._do_gzip_std_output:
			job_env_dict['GZIP_OUT'] = 'yes'
		if self._project_area_tarball_on_se:
			job_env_dict['SE_RUNTIME'] = 'yes'
		if self._project_area:
			job_env_dict['HAS_RUNTIME'] = 'yes'
		job_env_dict['CMSSW_EXEC'] = 'cmsRun'
		job_env_dict['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self._config_fn_list))
		job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top
		if self.prolog.is_active():
			job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command()
			job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ',
				imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list()))
			job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments()
		if self.epilog.is_active():
			job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command()
			job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ',
				imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list()))
			job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments()
		return job_env_dict
Ejemplo n.º 5
0
	def _resync_adapter(self, pa_old, pa_new, result_redo, result_disable, size_change):
		(map_jobnum2pnum, pspi_list_added, pspi_list_missing) = _diff_pspi_list(pa_old, pa_new,
			result_redo, result_disable)
		# Reorder and reconstruct parameter space with the following layout:
		# NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file)
		# <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD
		if pspi_list_added:
			_extend_map_jobnum2pnum(map_jobnum2pnum, pa_old.get_job_len(), pspi_list_added)
		if pspi_list_missing:
			# extend the parameter source by placeholders for the missing parameter space points
			psrc_missing = _create_placeholder_psrc(pa_old, pa_new,
				map_jobnum2pnum, pspi_list_missing, result_disable)
			self._psrc = ParameterSource.create_instance('ChainParameterSource',
				self._psrc_raw, psrc_missing)

		self._map_jobnum2pnum = map_jobnum2pnum  # Update Job2PID map
		# Write resynced state
		self._write_jobnum2pnum(self._path_jobnum2pnum + '.tmp')
		ParameterSource.get_class('GCDumpParameterSource').write(self._path_params + '.tmp',
			self.get_job_len(), self.get_job_metadata(), self.iter_jobs())
		os.rename(self._path_jobnum2pnum + '.tmp', self._path_jobnum2pnum)
		os.rename(self._path_params + '.tmp', self._path_params)

		result_redo = result_redo.difference(result_disable)
		if result_redo or result_disable:
			map_pnum2jobnum = reverse_dict(self._map_jobnum2pnum)

			def _translate_pnum(pnum):
				return map_pnum2jobnum.get(pnum, pnum)
			result_redo = set(imap(_translate_pnum, result_redo))
			result_disable = set(imap(_translate_pnum, result_disable))
			return (result_redo, result_disable, size_change)
		return (set(), set(), size_change)
Ejemplo n.º 6
0
	def __init__(self, jobDB, task, jobs = None, configString = ''):
		Report.__init__(self, jobDB, task, jobs, configString)
		catJobs = {}
		catDescDict = {}
		# Assignment of jobs to categories (depending on variables and using datasetnick if available)
		jobConfig = {}
		for jobNum in self._jobs:
			if task:
				jobConfig = task.getJobConfig(jobNum)
			varList = sorted(ifilter(lambda var: '!' not in repr(var), jobConfig.keys()))
			if 'DATASETSPLIT' in varList:
				varList.remove('DATASETSPLIT')
				varList.append('DATASETNICK')
			catKey = str.join('|', imap(lambda var: '%s=%s' % (var, jobConfig[var]), varList))
			catJobs.setdefault(catKey, []).append(jobNum)
			if catKey not in catDescDict:
				catDescDict[catKey] = dict(imap(lambda var: (var, jobConfig[var]), varList))
		# Kill redundant keys from description
		commonVars = dict(imap(lambda var: (var, jobConfig[var]), varList)) # seed with last varList
		for catKey in catDescDict:
			for key in list(commonVars.keys()):
				if key not in catDescDict[catKey].keys():
					commonVars.pop(key)
				elif commonVars[key] != catDescDict[catKey][key]:
					commonVars.pop(key)
		for catKey in catDescDict:
			for commonKey in commonVars:
				catDescDict[catKey].pop(commonKey)
		# Generate job-category map with efficient int keys - catNum becomes the new catKey
		self._job2cat = {}
		self._catDescDict = {}
		for catNum, catKey in enumerate(sorted(catJobs)):
			self._catDescDict[catNum] = catDescDict[catKey]
			self._job2cat.update(dict.fromkeys(catJobs[catKey], catNum))
Ejemplo n.º 7
0
	def __init__(self, config, job_db, task):
		map_cat2jobs = {}
		map_cat2desc = {}
		job_config_dict = {}
		vn_list = []
		for jobnum in job_db.get_job_list():
			if task:
				job_config_dict = task.get_job_dict(jobnum)
			vn_list = lfilter(self._is_not_ignored_vn, sorted(job_config_dict.keys()))
			cat_key = str.join('|', imap(lambda vn: '%s=%s' % (vn, job_config_dict[vn]), vn_list))
			map_cat2jobs.setdefault(cat_key, []).append(jobnum)
			if cat_key not in map_cat2desc:
				map_cat2desc[cat_key] = dict(imap(lambda var: (var, job_config_dict[var]), vn_list))
		# Kill redundant keys from description - seed with last vn_list
		common_var_dict = dict(imap(lambda var: (var, job_config_dict[var]), vn_list))
		for cat_key in map_cat2desc:
			for key in list(common_var_dict.keys()):
				if key not in map_cat2desc[cat_key].keys():
					common_var_dict.pop(key)
				elif common_var_dict[key] != map_cat2desc[cat_key][key]:
					common_var_dict.pop(key)
		for cat_key in map_cat2desc:
			for common_key in common_var_dict:
				map_cat2desc[cat_key].pop(common_key)
		# Generate job-category map with efficient int keys - catNum becomes the new cat_key
		self._job2cat = {}
		self._map_cat2desc = {}
		for cat_num, cat_key in enumerate(sorted(map_cat2jobs)):
			self._map_cat2desc[cat_num] = map_cat2desc[cat_key]
			self._job2cat.update(dict.fromkeys(map_cat2jobs[cat_key], cat_num))
Ejemplo n.º 8
0
	def _get_section_key(self, section):
		tmp = section.split()
		if not tmp:
			raise ConfigError('Invalid config section %r' % section)
		(cur_section, cur_name_list, cur_tag_map) = (tmp[0], [], {})
		for token in tmp[1:]:
			if ':' in token:
				tag_entry = token.split(':')
				if len(tag_entry) != 2:
					raise ConfigError('Invalid config tag in section %r' % section)
				cur_tag_map[tag_entry[0]] = tag_entry[1]
			elif token:
				cur_name_list.append(token)

		class_section_idx = safe_index(self._class_section_list, cur_section)
		section_idx = safe_index(self._section_list, cur_section)
		if (not self._class_section_list) and (not self._section_list):
			section_idx = 0
		if (class_section_idx is not None) or (section_idx is not None):
			# Section is selected by class or manually
			name_idx_tuple = tuple(imap(lambda n: safe_index(self._section_name_list, n), cur_name_list))
			if None not in name_idx_tuple:  # All names in current section are selected
				cur_tag_name_list = lfilter(cur_tag_map.__contains__, self._section_tag_order)
				left_tag_name_list = lfilter(lambda tn: tn not in self._section_tag_order, cur_tag_map)
				tag_tuple_list = imap(lambda tn: (tn, cur_tag_map[tn]), cur_tag_name_list)
				tag_idx_tuple = tuple(imap(lambda tt: safe_index(self._section_tag_list, tt), tag_tuple_list))
				if (None not in tag_idx_tuple) and not left_tag_name_list:
					return (class_section_idx, section_idx, name_idx_tuple, tag_idx_tuple)
Ejemplo n.º 9
0
	def _get_sandbox_file_list(self, task, sm_list):
		# Prepare all input files
		dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list)))
		dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep,
			lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list)
		task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(),
			*imap(lambda x: x.get_task_dict(), [task] + sm_list))
		task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list),
			'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name})
		task_config_str_list = DictFormat(escape_strings=True).format(
			task_config_dict, format='export %s%s%s\n')
		vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(),
			self._remote_event_handler.get_mon_env_dict().keys()))
		vn_alias_dict.update(task.get_var_alias_map())
		vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n')

		# Resolve wildcards in task input files
		def _get_task_fn_list():
			for fpi in task.get_sb_in_fpi_list():
				matched = glob.glob(fpi.path_abs)
				if matched != []:
					for match in matched:
						yield match
				else:
					yield fpi.path_abs
		return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [
			VirtualFile('_config.sh', sorted(task_config_str_list)),
			VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
Ejemplo n.º 10
0
	def _parseTime(self, time_str):
		result = 0
		entry_map = {'yea': 365 * 24 * 60 * 60, 'day': 24 * 60 * 60, 'hou': 60 * 60, 'min': 60, 'sec': 1}
		tmp = time_str.split()
		for (entry, value) in izip(imap(lambda x: x[:3], tmp[1::2]), imap(int, tmp[::2])):
			result += entry_map[entry] * value
		return result
Ejemplo n.º 11
0
	def __init__(self, head, data, delimeter='|'):
		ConsoleTable.__init__(self)
		head = list(head)
		self._delimeter = delimeter
		self._write_line(str.join(self._delimeter, imap(lambda x: x[1], head)))
		for entry in data:
			if isinstance(entry, dict):
				self._write_line(str.join(self._delimeter, imap(lambda x: str(entry.get(x[0], '')), head)))
Ejemplo n.º 12
0
	def process(self, pNum, splitInfo, result):
		if not self._lumi_filter.empty():
			lumi_filter = self._lumi_filter.lookup(splitInfo[DataSplitter.Nickname], is_selector = False)
			if lumi_filter:
				idxRuns = splitInfo[DataSplitter.MetadataHeader].index("Runs")
				iterRuns = ichain(imap(lambda m: m[idxRuns], splitInfo[DataSplitter.Metadata]))
				short_lumi_filter = filterLumiFilter(list(iterRuns), lumi_filter)
				result['LUMI_RANGE'] = str.join(',', imap(lambda lr: '"%s"' % lr, formatLumi(short_lumi_filter)))
Ejemplo n.º 13
0
	def _parse_status(self, value, default):
		if any(imap(lambda x: x in value, ['E', 'e'])):
			return Job.UNKNOWN
		if any(imap(lambda x: x in value, ['h', 's', 'S', 'T', 'w'])):
			return Job.QUEUED
		if any(imap(lambda x: x in value, ['r', 't'])):
			return Job.RUNNING
		return Job.READY
Ejemplo n.º 14
0
	def _parse_status(self, value, default):
		if any(imap(value.__contains__, ['E', 'e'])):
			return Job.UNKNOWN
		if any(imap(value.__contains__, ['h', 's', 'S', 'T', 'w'])):
			return Job.QUEUED
		if any(imap(value.__contains__, ['r', 't'])):
			return Job.RUNNING
		return Job.READY
Ejemplo n.º 15
0
	def process(self, pnum, partition, result):
		if self.enabled():
			lumi_filter = self._lumi_filter.lookup(partition[DataSplitter.Nickname], is_selector=False)
			if lumi_filter:
				idx_runs = partition[DataSplitter.MetadataHeader].index('Runs')
				iter_run = ichain(imap(lambda m: m[idx_runs], partition[DataSplitter.Metadata]))
				short_lumi_filter = filter_lumi_filter(list(iter_run), lumi_filter)
				iter_lumi_range_str = imap(lambda lr: '"%s"' % lr, format_lumi(short_lumi_filter))
				result['LUMI_RANGE'] = str.join(',', iter_lumi_range_str)
Ejemplo n.º 16
0
	def _readJob2PID(self):
		fp = ZipFile(self._pathJob2PID, 'r')
		try:
			self.maxN = int(fp.readline())
			if not self.maxN:
				self.maxN = None
			mapInfo = ifilter(identity, imap(str.strip, fp.readline().split(',')))
			self._mapJob2PID = dict(imap(lambda x: tuple(imap(lambda y: int(y.lstrip('!')), x.split(':'))), mapInfo))
			self._activeMap = {}
		finally:
			fp.close()
Ejemplo n.º 17
0
	def getEntries(self, path, metadata, events, seList, objStore):
		datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''), 'datacache.dat')
		source = utils.QM((self._source == '') and os.path.exists(datacachePath), datacachePath, self._source)
		if source and (source not in self._lfnMap):
			pSource = DataProvider.createInstance('ListProvider', createConfig(), source)
			for (n, fl) in imap(lambda b: (b[DataProvider.Dataset], b[DataProvider.FileList]), pSource.getBlocks()):
				self._lfnMap.setdefault(source, {}).update(dict(imap(lambda fi: (self.lfnTrans(fi[DataProvider.URL]), n), fl)))
		pList = set()
		for key in ifilter(lambda k: k in metadata, self._parentKeys):
			pList.update(imap(lambda pPath: self._lfnMap.get(source, {}).get(self.lfnTrans(pPath)), metadata[key]))
		metadata['PARENT_PATH'] = lfilter(identity, pList)
		yield (path, metadata, events, seList, objStore)
Ejemplo n.º 18
0
	def _read_jobnum2pnum(self):
		fp = GZipTextFile(self._path_jobnum2pnum, 'r')
		try:
			def _translate_info(jobnum_pnum_info):
				return tuple(imap(lambda x: int(x.lstrip('!')), jobnum_pnum_info.split(':', 1)))

			int(fp.readline())  # max number of jobs
			jobnum_pnum_info_iter = iidfilter(imap(str.strip, fp.readline().split(',')))
			self._map_jobnum2pnum = dict(imap(_translate_info, jobnum_pnum_info_iter))
			self._can_submit_map = {}
		finally:
			fp.close()
Ejemplo n.º 19
0
def download_job(opts, work_dn, status_mon, job_db, job_obj, jobnum):
	if job_obj.get('download') == 'True' and not opts.mark_ignore_dl:
		return status_mon.register_job_result(jobnum, 'All files already downloaded',
			JobDownloadStatus.JOB_ALREADY)

	# Read the file hash entries from job info file
	fi_list = FileInfoProcessor().process(os.path.join(work_dn, 'output', 'job_%d' % jobnum)) or []
	is_download_failed = False
	if not fi_list:
		if opts.mark_empty_fail:
			is_download_failed = True
		else:
			return status_mon.register_job_result(jobnum, 'Job has no output files',
				JobDownloadStatus.JOB_NO_OUTPUT)

	download_result_list = []
	progress = ProgressActivity('Processing output files', len(fi_list))
	for (fi_idx, fi) in enumerate(fi_list):
		progress.update_progress(fi_idx, msg='Processing output file %r' % fi[FileInfo.NameDest])
		download_result_list.append(download_single_file(opts, jobnum, fi_idx, fi, status_mon))
	progress.finish()

	is_download_failed = is_download_failed or any(imap(download_result_list.__contains__, [
		FileDownloadStatus.FILE_TIMEOUT, FileDownloadStatus.FILE_HASH_FAILED,
		FileDownloadStatus.FILE_TRANSFER_FAILED, FileDownloadStatus.FILE_MKDIR_FAILED]))
	is_download_success = all(imap([FileDownloadStatus.FILE_OK,
		FileDownloadStatus.FILE_EXISTS].__contains__, download_result_list))

	# Ignore the first opts.retry number of failed jobs
	retry_count = int(job_obj.get('download attempt', 0))
	if fi_list and is_download_failed and opts.retry and (retry_count < int(opts.retry)):
		set_job_prop(job_db, jobnum, job_obj, 'download attempt', str(retry_count + 1))
		return status_mon.register_job_result(jobnum, 'Download attempt #%d failed' % retry_count + 1,
			JobDownloadStatus.RETRY)

	delete_files(opts, jobnum, fi_list, is_download_failed)

	if is_download_failed:
		if opts.mark_fail:
			# Mark job as failed to trigger resubmission
			job_obj.state = Job.FAILED
			job_db.commit(jobnum, job_obj)
		status_mon.register_job_result(jobnum, 'Download failed', JobDownloadStatus.JOB_FAILED)
	elif is_download_success:
		if opts.mark_dl:
			# Mark as downloaded
			set_job_prop(job_db, jobnum, job_obj, 'download', 'True')
		status_mon.register_job_result(jobnum, 'Download successful', JobDownloadStatus.JOB_OK)
	else:
		# eg. because of SE blacklist
		status_mon.register_job_result(jobnum, 'Download incomplete', JobDownloadStatus.JOB_INCOMPLETE)
Ejemplo n.º 20
0
			def _getPartition(self, key):
				if not self._cacheKey == key / self._keySize:
					self._cacheKey = key / self._keySize
					subTarFileObj = self._tar.extractfile('%03dXX.tgz' % (key / self._keySize))
					subTarFileObj = BytesBuffer(gzip.GzipFile(fileobj = subTarFileObj).read()) # 3-4x speedup for sequential access
					self._cacheTar = tarfile.open(mode = 'r', fileobj = subTarFileObj)
				fullData = lmap(bytes2str, self._cacheTar.extractfile('%05d' % key).readlines())
				data = self._fmt.parse(lfilter(lambda x: not x.startswith('='), fullData),
					keyParser = {None: int}, valueParser = self._parserMap)
				fileList = imap(lambda x: x[1:], ifilter(lambda x: x.startswith('='), fullData))
				if DataSplitter.CommonPrefix in data:
					fileList = imap(lambda x: '%s/%s' % (data[DataSplitter.CommonPrefix], x), fileList)
				data[DataSplitter.FileList] = lmap(str.strip, fileList)
				return data
Ejemplo n.º 21
0
def process_intervention(opts, psource):
	log.info('')
	tmp = psource.getJobIntervention()
	if tmp:
		if opts.displaymode == 'parseable':
			log.info('R: %s', str.join(',', imap(str, tmp[0])))
			log.info('D: %s', str.join(',', imap(str, tmp[1])))
		else:
			log.info('   Redo: %s', repr(tmp[0]))
			log.info('Disable: %s', repr(tmp[1]))
	else:
		if opts.displaymode == 'parseable':
			log.info('NOINT')
		else:
			log.info('No intervention')
Ejemplo n.º 22
0
def process_intervention(opts, psource):
	utils.vprint('')
	tmp = psource.getJobIntervention()
	if tmp:
		if opts.displaymode == 'parseable':
			utils.vprint('R: %s' % str.join(',', imap(str, tmp[0])))
			utils.vprint('D: %s' % str.join(',', imap(str, tmp[1])))
		else:
			utils.vprint('   Redo: %r' % tmp[0])
			utils.vprint('Disable: %r' % tmp[1])
	else:
		if opts.displaymode == 'parseable':
			utils.vprint('NOINT')
		else:
			utils.vprint('No intervention')
Ejemplo n.º 23
0
	def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict):
		jobnum = metadata_dict['GC_JOBNUM']
		cms_log_fn = os.path.join(item, 'cmssw.dbs.tar.gz')
		if os.path.exists(cms_log_fn):
			tar = tarfile.open(cms_log_fn, 'r')
			# Collect infos about transferred files
			file_summary_map = {}
			try:
				file_info_str_list = tar.extractfile('files').readlines()
				for rawdata in imap(lambda value: bytes2str(value).split(), file_info_str_list):
					file_summary_map[rawdata[2]] = {
						'SE_OUTPUT_HASH_CRC32': rawdata[0],
						'SE_OUTPUT_SIZE': int(rawdata[1])
					}
				obj_dict['CMSSW_FILES'] = file_summary_map
			except Exception:
				raise DatasetError('Could not read CMSSW file infos for job %d!' % jobnum)
			# Collect infos about CMSSW processing steps
			config_summary_map = {}
			self._process_steps(jobnum, tar, config_summary_map, file_summary_map)
			for cfg in config_summary_map:
				job_hash_list = metadata_dict.setdefault('CMSSW_CONFIG_JOBHASH', [])
				job_hash_list.append(config_summary_map[cfg]['CMSSW_CONFIG_HASH'])
			obj_dict.update({'CMSSW_CONFIG': config_summary_map, 'CMSSW_FILES': file_summary_map})
			tar.close()
		yield (item, metadata_dict, entries, location_list, obj_dict)
Ejemplo n.º 24
0
	def __init__(self, arg, **kwargs):
		idList = imap(lambda x: x.split('-'), arg.split(','))
		try:
			parse = lambda x: utils.QM(x != '', int, str)
			self.ranges = lmap(lambda x: (parse(x[0])(x[0]), parse(x[-1])(x[-1])), idList)
		except Exception:
			raise UserError('Job identifiers must be integers or ranges.')
Ejemplo n.º 25
0
	def process(self, wait = utils.wait):
		wmsTiming = self.wms.getTimings()
		t_start = time.time()
		while True:
			didWait = False
			# Check whether wms can submit
			if not self.wms.canSubmit(self._submitTime, self._submitFlag):
				self._submitFlag = False
			# Check free disk space
			spaceLogger = logging.getLogger('workflow.space')
			spaceLogger.addFilter(LogEveryNsec(interval = 5 * 60))
			if (self._checkSpace > 0) and utils.freeSpace(self._workDir) < self._checkSpace:
				spaceLogger.warning('Not enough space left in working directory')
			else:
				for action in imap(str.lower, self._actionList):
					if action.startswith('c') and not utils.abort():   # check for jobs
						if self.jobManager.check(self.wms):
							didWait = wait(wmsTiming.waitBetweenSteps)
					elif action.startswith('r') and not utils.abort(): # retrieve finished jobs
						if self.jobManager.retrieve(self.wms):
							didWait = wait(wmsTiming.waitBetweenSteps)
					elif action.startswith('s') and not utils.abort() and self._submitFlag:
						if self.jobManager.submit(self.wms):
							didWait = wait(wmsTiming.waitBetweenSteps)

			# quit if abort flag is set or not in continuous mode
			if utils.abort() or ((self.duration >= 0) and (time.time() - t_start > self.duration)):
				break
			# idle timeout
			if not didWait:
				wait(wmsTiming.waitOnIdle)
		self.monitor.onFinish()
Ejemplo n.º 26
0
	def __call__(self, jobNum, jobObj):
		def checkID(jobRange):
			if (jobRange[0] == '') or (jobNum >= jobRange[0]):
				if (jobRange[1] == '') or (jobNum <= jobRange[1]):
					return True
			return False
		return reduce(operator.or_, imap(checkID, self.ranges))
Ejemplo n.º 27
0
	def _build_blocks(self, map_key2fm_list, map_key2name, map_key2metadata_dict):
		# Return named dataset
		for key in sorted(map_key2fm_list):
			result = {
				DataProvider.Dataset: map_key2name[key[:1]],
				DataProvider.BlockName: map_key2name[key[:2]],
			}
			fm_list = map_key2fm_list[key]

			# Determine location_list
			location_list = None
			for file_location_list in ifilter(lambda s: s is not None, imap(itemgetter(3), fm_list)):
				location_list = location_list or []
				location_list.extend(file_location_list)
			if location_list is not None:
				result[DataProvider.Locations] = list(UniqueList(location_list))

			# use first file [0] to get the initial metadata_dict [1]
			metadata_name_list = list(fm_list[0][1].keys())
			result[DataProvider.Metadata] = metadata_name_list

			# translate file metadata into data provider file info entries
			def _translate_fm2fi(url, metadata_dict, entries, location_list, obj_dict):
				if entries is None:
					entries = -1
				return {DataProvider.URL: url, DataProvider.NEntries: entries,
					DataProvider.Metadata: lmap(metadata_dict.get, metadata_name_list)}
			result[DataProvider.FileList] = lsmap(_translate_fm2fi, fm_list)
			yield result
Ejemplo n.º 28
0
	def _getSectionKey(self, section):
		tmp = section.split()
		assert(len(tmp) > 0)
		(curSection, curNames, curTags) = (tmp[0], [], {})
		for token in tmp[1:]:
			if ':' in token:
				tag_entry = token.split(':')
				assert(len(tag_entry) == 2)
				curTags[tag_entry[0]] = tag_entry[1]
			elif token:
				curNames.append(token)

		def myIndex(src, value):
			try:
				return src.index(value)
			except Exception:
				return None
		idxClass = myIndex(self._cfgClassSections, curSection)
		idxSection = myIndex(self._cfgSections, curSection)
		if (not self._cfgClassSections) and (not self._cfgSections):
			idxSection = 0
		if (idxClass is not None) or (idxSection is not None): # Section is selected by class or manually
			idxNames = tuple(imap(lambda n: myIndex(self._cfgNames, n), curNames))
			if None not in idxNames: # All names in current section are selected
				curTagNames = lfilter(lambda tn: tn in curTags, self._cfgTagsOrder)
				curTagNamesLeft = lfilter(lambda tn: tn not in self._cfgTagsOrder, curTags)
				idxTags = lmap(lambda tn: myIndex(self._cfgTags, (tn, curTags[tn])), curTagNames)
				if (None not in idxTags) and not curTagNamesLeft:
					return (idxClass, idxSection, idxNames, idxTags)
Ejemplo n.º 29
0
def partition_check(splitter):
		fail = utils.set()
		for jobNum in irange(splitter.getMaxJobs()):
			splitInfo = splitter.getSplitInfo(jobNum)
			try:
				(events, skip, files) = (0, 0, [])
				for line in open(os.path.join(opts.checkSplitting, 'jobs', 'job_%d.var' % jobNum)).readlines():
					if 'MAX_EVENTS' in line:
						events = int(line.split('MAX_EVENTS', 1)[1].replace('=', ''))
					if 'SKIP_EVENTS' in line:
						skip = int(line.split('SKIP_EVENTS', 1)[1].replace('=', ''))
					if 'FILE_NAMES' in line:
						files = line.split('FILE_NAMES', 1)[1].replace('=', '').replace('\"', '').replace('\\', '')
						files = lmap(lambda x: x.strip().strip(','), files.split())
				def printError(curJ, curS, msg):
					if curJ != curS:
						logging.warning('%s in job %d (j:%s != s:%s)', msg, jobNum, curJ, curS)
						fail.add(jobNum)
				printError(events, splitInfo[DataSplitter.NEntries], 'Inconsistent number of events')
				printError(skip, splitInfo[DataSplitter.Skipped], 'Inconsistent number of skipped events')
				printError(files, splitInfo[DataSplitter.FileList], 'Inconsistent list of files')
			except Exception:
				logging.warning('Job %d was never initialized!', jobNum)
		if fail:
			logging.warning('Failed: ' + str.join('\n', imap(str, fail)))
Ejemplo n.º 30
0
	def processBlock(self, block):
		if self._lumi_filter.empty() and ((self._lumi_keep == LumiKeep.RunLumi) or (DataProvider.Metadata not in block)):
			return block
		def getMetadataIdx(key):
			if key in block.get(DataProvider.Metadata, []):
				return block[DataProvider.Metadata].index(key)
		idxRuns = getMetadataIdx('Runs')
		idxLumi = getMetadataIdx('Lumi')
		if not self._lumi_filter.empty():
			lumi_filter = self._lumi_filter.lookup(block[DataProvider.Nickname], is_selector = False)
			if lumi_filter and ((idxRuns is None) or (idxLumi is None)) and self._lumi_strict:
				fqName = block[DataProvider.Dataset]
				if block[DataProvider.BlockName] != '0':
					fqName += '#' + block[DataProvider.BlockName]
				raise DatasetError('Strict lumi filter active but dataset %s does not provide lumi information!' % fqName)

		block[DataProvider.FileList] = list(self._processFI(block, idxRuns, idxLumi))
		if not block[DataProvider.FileList]:
			return
		block[DataProvider.NEntries] = sum(imap(lambda fi: fi[DataProvider.NEntries], block[DataProvider.FileList]))
		if self._lumi_keep == LumiKeep.RunLumi:
			return block
		elif self._lumi_keep == LumiKeep.Run:
			if idxLumi is not None:
				block[DataProvider.Metadata].pop(idxLumi)
			return block
		removeRunLumi(block[DataProvider.Metadata], idxRuns, idxLumi)
		return block
Ejemplo n.º 31
0
    def retrieveJobs(self, gcID_jobNum_List
                     ):  # Process output sandboxes returned by getJobsOutput
        # Function to force moving a directory
        def forceMove(source, target):
            try:
                if os.path.exists(target):
                    shutil.rmtree(target)
            except IOError:
                self._log.exception('%r cannot be removed', target)
                return False
            try:
                shutil.move(source, target)
            except IOError:
                self._log.exception(
                    'Error moving job output directory from %r to %r', source,
                    target)
                return False
            return True

        retrievedJobs = []

        for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List):
            # inJobNum != None, pathName == None => Job could not be retrieved
            if pathName is None:
                if inJobNum not in retrievedJobs:
                    yield (inJobNum, -1, {}, None)
                continue

            # inJobNum == None, pathName != None => Found leftovers of job retrieval
            if inJobNum is None:
                continue

            # inJobNum != None, pathName != None => Job retrieval from WMS was ok
            jobFile = os.path.join(pathName, 'job.info')
            try:
                job_info = self._job_parser.process(pathName)
            except Exception:
                self._log.exception('Unable to parse job.info')
                job_info = None
            if job_info:
                jobNum = job_info[JobResult.JOBNUM]
                if jobNum != inJobNum:
                    raise BackendError('Invalid job id in job file %s' %
                                       jobFile)
                if forceMove(pathName,
                             os.path.join(self._outputPath,
                                          'job_%d' % jobNum)):
                    retrievedJobs.append(inJobNum)
                    yield (jobNum, job_info[JobResult.EXITCODE],
                           job_info[JobResult.RAW], pathName)
                else:
                    yield (jobNum, -1, {}, None)
                continue

            # Clean empty pathNames
            for subDir in imap(lambda x: x[0], os.walk(pathName,
                                                       topdown=False)):
                try:
                    os.rmdir(subDir)
                except Exception:
                    clear_current_exception()

            if os.path.exists(pathName):
                # Preserve failed job
                utils.ensureDirExists(self._failPath,
                                      'failed output directory')
                forceMove(
                    pathName,
                    os.path.join(self._failPath, os.path.basename(pathName)))

            yield (inJobNum, -1, {}, None)
Ejemplo n.º 32
0
def list_dataset_names(ds_name_list):
    header_list = [(DataProvider.Dataset, 'Dataset')]
    ConsoleTable.create(
        header_list,
        imap(lambda name: {DataProvider.Dataset: name}, ds_name_list))
Ejemplo n.º 33
0
 def _cleanup_dict(
     mapping
 ):  # strip all key value entries and filter empty parameters
     tmp = tuple(
         imap(lambda item: lmap(str.strip, item), mapping.items()))
     return dict(lfilter(lambda k_v: k_v[0] != '', tmp))
Ejemplo n.º 34
0
 def __call__(self, jobNum, jobObj):
     return reduce(
         operator.and_,
         imap(lambda selector: selector(jobNum, jobObj), self._selectors))
Ejemplo n.º 35
0
def wrapList(value, length, delimLines=',\n', delimEntries=', '):
    counter = lambda item, buffer: len(item) + sum(imap(len, buffer)
                                                   ) + 2 * len(buffer) > length
    wrapped = accumulate(value, [], counter, opAdd=lambda x, y: x + [y])
    return str.join(delimLines,
                    imap(lambda x: str.join(delimEntries, x), wrapped))
Ejemplo n.º 36
0
def download_job(opts, work_dn, status_mon, job_db, job_obj, jobnum):
    if job_obj.get('download') == 'True' and not opts.mark_ignore_dl:
        return status_mon.register_job_result(jobnum,
                                              'All files already downloaded',
                                              JobDownloadStatus.JOB_ALREADY)

    # Read the file hash entries from job info file
    fi_list = FileInfoProcessor().process(
        os.path.join(work_dn, 'output', 'job_%d' % jobnum)) or []
    is_download_failed = False
    if not fi_list:
        if opts.mark_empty_fail:
            is_download_failed = True
        else:
            return status_mon.register_job_result(
                jobnum, 'Job has no output files',
                JobDownloadStatus.JOB_NO_OUTPUT)

    download_result_list = []
    progress = ProgressActivity('Processing output files', len(fi_list))
    for (fi_idx, fi) in enumerate(fi_list):
        progress.update_progress(fi_idx,
                                 msg='Processing output file %r' %
                                 fi[FileInfo.NameDest])
        download_result_list.append(
            download_single_file(opts, jobnum, fi_idx, fi, status_mon))
    progress.finish()

    is_download_failed = is_download_failed or any(
        imap(download_result_list.__contains__, [
            FileDownloadStatus.FILE_TIMEOUT,
            FileDownloadStatus.FILE_HASH_FAILED,
            FileDownloadStatus.FILE_TRANSFER_FAILED,
            FileDownloadStatus.FILE_MKDIR_FAILED
        ]))
    is_download_success = all(
        imap([FileDownloadStatus.FILE_OK,
              FileDownloadStatus.FILE_EXISTS].__contains__,
             download_result_list))

    # Ignore the first opts.retry number of failed jobs
    retry_count = int(job_obj.get('download attempt', 0))
    if fi_list and is_download_failed and opts.retry and (retry_count < int(
            opts.retry)):
        set_job_prop(job_db, jobnum, job_obj, 'download attempt',
                     str(retry_count + 1))
        return status_mon.register_job_result(
            jobnum, 'Download attempt #%d failed' % retry_count + 1,
            JobDownloadStatus.RETRY)

    delete_files(opts, jobnum, fi_list, is_download_failed)

    if is_download_failed:
        if opts.mark_fail:
            # Mark job as failed to trigger resubmission
            job_obj.state = Job.FAILED
            job_db.commit(jobnum, job_obj)
        status_mon.register_job_result(jobnum, 'Download failed',
                                       JobDownloadStatus.JOB_FAILED)
    elif is_download_success:
        if opts.mark_dl:
            # Mark as downloaded
            set_job_prop(job_db, jobnum, job_obj, 'download', 'True')
        status_mon.register_job_result(jobnum, 'Download successful',
                                       JobDownloadStatus.JOB_OK)
    else:
        # eg. because of SE blacklist
        status_mon.register_job_result(jobnum, 'Download incomplete',
                                       JobDownloadStatus.JOB_INCOMPLETE)
Ejemplo n.º 37
0
 def fillParameterKeys(self, result):
     result.extend(imap(ParameterMetadata, self._keys))
Ejemplo n.º 38
0
 def _center_of_mass(data):
     wsum_x = sum(imap(lambda pt: pt['x'] * pt['weight'], data))
     wsum_y = sum(imap(lambda pt: pt['y'] * pt['weight'], data))
     sum_w = sum(imap(lambda pt: pt['weight'], data))
     return {'x': wsum_x / sum_w, 'y': wsum_y / sum_w}
Ejemplo n.º 39
0
 def _get_map_gc_id_jobnum(self, jobnum_list):
     return dict(
         imap(lambda jobnum: (self.job_db.get_job(jobnum).gc_id, jobnum),
              jobnum_list))
Ejemplo n.º 40
0
 def getScript(self):
     return lchain(imap(lambda h: h.getScript(), self._handlers))
Ejemplo n.º 41
0
	def getNeededKeys(self, splitter):
		return lchain(imap(lambda p: p.getNeededKeys(splitter), self._processorList))
Ejemplo n.º 42
0
	def getKeys(self):
		return lchain(imap(lambda p: p.getKeys(), self._processorList))
Ejemplo n.º 43
0
 def formatRange(rlrange):
     (start, end) = rlrange
     default = lambda x, d: (x, d)[x is None]
     start = [default(start[0], '1'), default(start[1], 'MIN')]
     end = [default(end[0], '9999999'), default(end[1], 'MAX')]
     return str.join('-', imap(lambda x: '%s:%s' % tuple(x), (start, end)))
Ejemplo n.º 44
0
 def getList(self, option, default=noDefault, parseItem=identity, **kwargs):
     obj2str = lambda value: '\n' + str.join('\n', imap(str, value))
     str2obj = lambda value: lmap(parseItem, parseList(value, None))
     return self._getInternal('list', obj2str, str2obj, None, option,
                              default, **kwargs)
Ejemplo n.º 45
0
 def newBlock(self, old, filelist):
     new = dict(old)
     new[DataProvider.FileList] = filelist
     new[DataProvider.NEntries] = sum(
         imap(lambda x: x[DataProvider.NEntries], filelist))
     return new
Ejemplo n.º 46
0
 def _sum(job_class):
     return sum(imap(js_dict.get, job_class.state_list))
Ejemplo n.º 47
0
def accepted_se(opts, fi):
    return any(imap(fi[FileInfo.Path].__contains__,
                    opts.select_se)) or not opts.select_se
Ejemplo n.º 48
0
def get_local_username():
    for username in iidfilter(
            imap(os.environ.get, ['LOGNAME', 'USER', 'LNAME', 'USERNAME'])):
        return username
    return ''
Ejemplo n.º 49
0
 def __repr__(self):
     return 'key(%s)' % str.join(
         ', ', imap(lambda x: "'%s'" % x, self._lookup_keys))
Ejemplo n.º 50
0
 def cleanupDict(d):
     # strip all key value entries
     tmp = tuple(imap(lambda item: imap(str.strip, item), d.items()))
     # filter empty parameters
     return lfilter(lambda k_v: k_v[0] != '', tmp)
Ejemplo n.º 51
0
def formatDict(d, fmt='%s=%r', joinStr=', '):
    return str.join(joinStr, imap(lambda k: fmt % (k, d[k]), sorted(d)))
Ejemplo n.º 52
0
 def _formatFileList(self, fl):
     if self._prefix:
         fl = imap(lambda fn: self._prefix + fn.split('/store/', 1)[-1], fl)
     return str.join(', ', imap(lambda x: '"%s"' % x, fl))
Ejemplo n.º 53
0
 def _accept_run(self, block, fi, idx_runs, lumi_filter):
     if idx_runs is None:
         return True
     return any(
         imap(lambda run: select_run(run, lumi_filter),
              fi[DataProvider.Metadata][idx_runs]))
Ejemplo n.º 54
0
def parse_list(value, delimeter, filter_fun=lambda x: x not in ['', '\n']):
    if value:
        return lfilter(filter_fun, imap(str.strip, value.split(delimeter)))
    return []
Ejemplo n.º 55
0
 def _counter(item, buffer):
     return len(item) + sum(imap(len, buffer)) + 2 * len(buffer) > length
Ejemplo n.º 56
0
 def fill_parameter_metadata(self, result):
     result.extend(imap(ParameterMetadata, self._output_vn_list))
Ejemplo n.º 57
0
 def storageReq(self, sites):
     fmt = lambda x: '(target.GlueSEUniqueID == %s)' % jdlEscape(x)
     if sites:
         return 'anyMatch(other.storage.CloseSEs, ' + str.join(
             ' || ', imap(fmt, sites)) + ')'
Ejemplo n.º 58
0
 def __repr__(self):
     return '%s(quiet = %r, code = %r, var = %r, file = %r, tree = %r, thread = %r)' % (
         self.__class__.__name__,
         tuple(imap(logging.getLevelName,
                    self._force_details_range)), self._ex_context,
         self._ex_vars, self._ex_fstack, self._ex_tree, self._ex_threads)
Ejemplo n.º 59
0
 def get_dependency_list(self):
     if True in imap(lambda x: not x.startswith('dir'),
                     self._storage_paths):
         return ['glite']
     return []
Ejemplo n.º 60
0
 def getTaskConfig(self):
     return {
         'GC_MONITORING':
         str.join(' ', imap(os.path.basename, self.getScript()))
     }