コード例 #1
0
ファイル: provider_scan.py プロジェクト: Fra-nk/grid-control
	def _getBlocksInternal(self):
		# Split files into blocks/datasets via key functions and determine metadata intersection
		(protoBlocks, commonDS, commonB) = ({}, {}, {})
		def getActiveKeys(kUser, kGuard, gIdx):
			return kUser + (kGuard or lchain(imap(lambda x: x.getGuards()[gIdx], self._scanner)))
		keysDS = getActiveKeys(self._ds_keys_user, self._ds_keys_guard, 0)
		keysB = getActiveKeys(self._b_keys_user, self._b_keys_guard, 1)
		for fileInfo in ifilter(itemgetter(0), self._collectFiles()):
			hashDS = self._generateKey(keysDS, md5_hex(repr(self._datasetExpr)) + md5_hex(repr(self._datasetNick)), *fileInfo)
			hashB = self._generateKey(keysB, hashDS + md5_hex(repr(fileInfo[3])), *fileInfo) # [3] == SE list
			if not self._ds_select or (hashDS in self._ds_select):
				if not self._b_select or (hashB in self._b_select):
					fileInfo[1].update({'DS_KEY': hashDS, 'BLOCK_KEY': hashB})
					protoBlocks.setdefault(hashDS, {}).setdefault(hashB, []).append(fileInfo)
					utils.intersectDict(commonDS.setdefault(hashDS, dict(fileInfo[1])), fileInfo[1])
					utils.intersectDict(commonB.setdefault(hashDS, {}).setdefault(hashB, dict(fileInfo[1])), fileInfo[1])

		# Generate names for blocks/datasets using common metadata
		(hashNameDictDS, hashNameDictB) = ({}, {})
		for hashDS in protoBlocks:
			hashNameDictDS[hashDS] = self._generateDatasetName(hashDS, commonDS[hashDS])
			for hashB in protoBlocks[hashDS]:
				hashNameDictB[hashB] = (hashDS, self._generateBlockName(hashB, commonB[hashDS][hashB]))

		self._findCollision('dataset', hashNameDictDS, commonDS, keysDS, lambda name, key: [key])
		self._findCollision('block', hashNameDictB, commonB, keysDS + keysB, lambda name, key: [name[0], key], lambda name: name[1])

		for block in self._buildBlocks(protoBlocks, hashNameDictDS, hashNameDictB):
			yield block
コード例 #2
0
def makeEnum(members = None, cls = None, useHash = True):
	members = members or []
	if cls:
		enumID = md5_hex(str(members) + '!' + cls.__name__)[:4]
	else:
		enumID = md5_hex(str(members))[:4]
		cls = type('Enum_%s_%s' % (enumID, str.join('_', members)), (), {})

	def getValue(idx, name):
		if useHash:
			return idx + int(enumID, 16)
		else:
			return idx
	values = lsmap(getValue, enumerate(members))

	cls.enumNames = members
	cls.enumValues = values
	enumMapNV = dict(izip(imap(str.lower, cls.enumNames), cls.enumValues))
	enumMapVN = dict(izip(cls.enumValues, cls.enumNames))
	if len(enumMapNV) != len(enumMapVN):
		raise APIError('Invalid enum definition!')
	def str2enum(cls, value, *args):
		return enumMapNV.get(value.lower(), *args)
	cls.enum2str = enumMapVN.get
	cls.str2enum = classmethod(str2enum)
	for name, value in izip(cls.enumNames, cls.enumValues):
		setattr(cls, name, value)
	return cls
コード例 #3
0
def make_enum(enum_name_list=None, cls=None, use_hash=True, register=True):
	enum_name_list = enum_name_list or []
	if cls:
		enum_id = md5_hex(str(enum_name_list) + '!' + cls.__name__)[:4]
	else:
		enum_id = md5_hex(str(enum_name_list))[:4]
		cls = type('Enum_%s_%s' % (enum_id, str.join('_', enum_name_list)), (), {})

	def _intstr2enum(cls, value, default=unspecified):
		enum = ignore_exception(Exception, default, int, value)
		if enum not in cls.enum_value_list:
			allowed_str = str.join(', ', imap(lambda nv: '%s=%s', _map_name2value.items()))
			raise Exception('Invalid enum value %s (allowed are %r)' % (repr(value), allowed_str))
		return enum

	def _register_enum(cls, name):
		value = len(cls.enum_name_list)
		if use_hash:
			value += int(enum_id, 16)
		for enum_cls in make_enum.enum_list:
			if use_hash and (value in enum_cls.enum_value_list) and (enum_cls.enum_id != enum_id):
				raise APIError('enum value collision detected!')
		cls.enum_name_list.append(name)
		cls.enum_value_list.append(value)
		setattr(cls, name, value)
		_map_name2value[name.lower()] = value
		_map_value2name[value] = name
		if len(_map_name2value) != len(_map_value2name):
			raise APIError('Invalid enum definition! (%s:%s)' % (_map_name2value, _map_value2name))

	def _str2enum(cls, value, *args):
		lookup_fun = _map_name2value.__getitem__
		if args:
			lookup_fun = _map_name2value.get
		try:
			return lookup_fun(value.lower(), *args)
		except Exception:
			allowed_str = str.join(', ', cls.enum_name_list)
			raise Exception('Invalid enum string %s (allowed are %r)' % (repr(value), allowed_str))

	_map_value2name = {}
	_map_name2value = {}
	cls.enum_id = enum_id
	cls.enum_name_list = []
	cls.enum_value_list = []
	cls.enum2str = _map_value2name.get
	cls.str2enum = classmethod(_str2enum)
	cls.intstr2enum = classmethod(_intstr2enum)
	cls.register_enum = classmethod(_register_enum)

	for enum_name in enum_name_list:
		cls.register_enum(enum_name)
	if register:
		make_enum.enum_list.append(cls)
	return cls
コード例 #4
0
    def _getBlocksInternal(self):
        # Split files into blocks/datasets via key functions and determine metadata intersection
        (protoBlocks, commonDS, commonB) = ({}, {}, {})

        def getActiveKeys(kUser, kGuard, gIdx):
            return kUser + (kGuard or lchain(
                imap(lambda x: x.getGuards()[gIdx], self._scanner)))

        keysDS = getActiveKeys(self._ds_keys_user, self._ds_keys_guard, 0)
        keysB = getActiveKeys(self._b_keys_user, self._b_keys_guard, 1)
        for fileInfo in ifilter(itemgetter(0), self._collectFiles()):
            hashDS = self._generateKey(
                keysDS,
                md5_hex(repr(self._datasetExpr)) +
                md5_hex(repr(self._datasetNick)), *fileInfo)
            hashB = self._generateKey(keysB,
                                      hashDS + md5_hex(repr(fileInfo[3])),
                                      *fileInfo)  # [3] == SE list
            if not self._ds_select or (hashDS in self._ds_select):
                if not self._b_select or (hashB in self._b_select):
                    fileInfo[1].update({'DS_KEY': hashDS, 'BLOCK_KEY': hashB})
                    protoBlocks.setdefault(hashDS,
                                           {}).setdefault(hashB,
                                                          []).append(fileInfo)
                    utils.intersectDict(
                        commonDS.setdefault(hashDS, dict(fileInfo[1])),
                        fileInfo[1])
                    utils.intersectDict(
                        commonB.setdefault(hashDS, {}).setdefault(
                            hashB, dict(fileInfo[1])), fileInfo[1])

        # Generate names for blocks/datasets using common metadata
        (hashNameDictDS, hashNameDictB) = ({}, {})
        for hashDS in protoBlocks:
            hashNameDictDS[hashDS] = self._generateDatasetName(
                hashDS, commonDS[hashDS])
            for hashB in protoBlocks[hashDS]:
                hashNameDictB[hashB] = (hashDS,
                                        self._generateBlockName(
                                            hashB, commonB[hashDS][hashB]))

        self._findCollision('dataset', hashNameDictDS, commonDS, keysDS,
                            lambda name, key: [key])
        self._findCollision('block', hashNameDictB, commonB, keysDS + keysB,
                            lambda name, key: [name[0], key],
                            lambda name: name[1])

        for block in self._buildBlocks(protoBlocks, hashNameDictDS,
                                       hashNameDictB):
            yield block
コード例 #5
0
ファイル: condor_wms.py プロジェクト: mschnepf/grid-control
	def __init__(self, config, name):
		self._sandbox_helper = SandboxHelper(config)
		self._error_log_fn = config.get_work_path('error.tar')
		cancel_executor = CancelAndPurgeJobs(config, CondorCancelJobs(config),
				LocalPurgeJobs(config, self._sandbox_helper))
		BasicWMS.__init__(self, config, name,
			check_executor=CheckJobsMissingState(config, CondorCheckJobs(config)),
			cancel_executor=cancel_executor)
		self._task_id = config.get('task id', md5_hex(str(time.time())), persistent=True)  # FIXME
		# finalize config state by reading values or setting to defaults
		# load keys for condor pool ClassAds
		self._jdl_writer = CondorJDLWriter(config)
		self._universe = config.get('universe', 'vanilla', on_change=None)
		self._pool_req_dict = config.get_dict('poolArgs req', {})[0]
		self._pool_work_dn = None
		self._proc_factory = None
		(self._submit_exec, self._transfer_exec) = (None, None)
		# prepare interfaces for local/remote/ssh pool access
		self._remote_type = config.get_enum('remote Type', PoolType, PoolType.LOCAL)
		self._init_pool_interface(config)
		# Sandbox base path where individual job data is stored, staged and returned to
		self._sandbox_dn = config.get_path('sandbox path',
			config.get_work_path('sandbox'), must_exist=False)
		# broker for selecting sites - FIXME: this looks wrong... pool != site
		self._pool_host_list = config.get_list(['poolhostlist', 'pool host list'], [])
		self._broker_site = config.get_plugin('site broker', 'UserBroker', cls=Broker,
			bind_kwargs={'tags': [self]}, pargs=('sites', 'sites', lambda: self._pool_host_list))
		self._wall_time_mode = config.get_enum('wall time mode', WallTimeMode, WallTimeMode.ignore,
			subset=[WallTimeMode.hard, WallTimeMode.ignore])
コード例 #6
0
	def __init__(self, config, name):
		self._sandbox_helper = SandboxHelper(config)
		self._error_log_fn = config.get_work_path('error.tar')
		cancel_executor = CancelAndPurgeJobs(config, CondorCancelJobs(config),
				LocalPurgeJobs(config, self._sandbox_helper))
		BasicWMS.__init__(self, config, name,
			check_executor=CheckJobsMissingState(config, CondorCheckJobs(config)),
			cancel_executor=cancel_executor)
		self._task_id = config.get('task id', md5_hex(str(time.time())), persistent=True)  # FIXME
		# finalize config state by reading values or setting to defaults
		# load keys for condor pool ClassAds
		self._jdl_writer = CondorJDLWriter(config)
		self._universe = config.get('universe', 'vanilla', on_change=None)
		self._pool_req_dict = config.get_dict('poolArgs req', {})[0]
		self._pool_work_dn = None
		self._proc_factory = None
		(self._submit_exec, self._transfer_exec) = (None, None)
		# prepare interfaces for local/remote/ssh pool access
		self._remote_type = config.get_enum('remote Type', PoolType, PoolType.LOCAL)
		self._init_pool_interface(config)
		# Sandbox base path where individual job data is stored, staged and returned to
		self._sandbox_dn = config.get_path('sandbox path',
			config.get_work_path('sandbox'), must_exist=False)
		# broker for selecting sites - FIXME: this looks wrong... pool != site
		self._pool_host_list = config.get_list(['poolhostlist', 'pool host list'], [])
		self._broker_site = config.get_plugin('site broker', 'UserBroker', cls=Broker,
			bind_kwargs={'tags': [self]}, pargs=('sites', 'sites', lambda: self._pool_host_list))
		self._wall_time_mode = config.get_enum('wall time mode', WallTimeMode, WallTimeMode.ignore,
			subset=[WallTimeMode.hard, WallTimeMode.ignore])
		self._blacklist_nodes = config.get_list(['blacklist nodes'], [], on_change=None)
		self._user_requirements = config.get('user requirements', '', on_change=None)
コード例 #7
0
 def getHash(self):
     buffer = StringBuffer()
     for _ in DataProvider.saveToStream(
             buffer,
             self._datasetProcessor.process(self.getBlocksNormed())):
         pass
     return md5_hex(buffer.getvalue())
コード例 #8
0
ファイル: wms_grid.py プロジェクト: tolange/grid-control
    def _get_jobs_output(self, gc_id_jobnum_list):
        # Get output of jobs and yield output dirs
        if len(gc_id_jobnum_list) == 0:
            raise StopIteration

        root_dn = os.path.join(self._path_output, 'tmp')
        try:
            if len(gc_id_jobnum_list) == 1:
                # For single jobs create single subdir
                tmp_dn = os.path.join(root_dn,
                                      md5_hex(gc_id_jobnum_list[0][0]))
            else:
                tmp_dn = root_dn
            ensure_dir_exists(tmp_dn)
        except Exception:
            raise BackendError(
                'Temporary path "%s" could not be created.' % tmp_dn,
                BackendError)

        map_gc_id2jobnum = dict(gc_id_jobnum_list)
        jobs = self._write_wms_id_list(gc_id_jobnum_list)

        activity = Activity('retrieving %d job outputs' %
                            len(gc_id_jobnum_list))
        proc = LocalProcess(self._output_exec, '--noint', '--logfile',
                            '/dev/stderr', '-i', jobs, '--dir', tmp_dn)

        # yield output dirs
        todo = map_gc_id2jobnum.values()
        current_jobnum = None
        for line in imap(str.strip, proc.stdout.iter(timeout=60)):
            if line.startswith(tmp_dn):
                todo.remove(current_jobnum)
                output_dn = line.strip()
                unpack_wildcard_tar(self._log, output_dn)
                yield (current_jobnum, output_dn)
                current_jobnum = None
            else:
                current_jobnum = map_gc_id2jobnum.get(self._create_gc_id(line),
                                                      current_jobnum)
        exit_code = proc.status(timeout=0, terminate=True)
        activity.finish()

        if exit_code != 0:
            if 'Keyboard interrupt raised by user' in proc.stderr.read(
                    timeout=0):
                remove_files([jobs, root_dn])
                raise StopIteration
            else:
                self._log.log_process(proc,
                                      files={'jobs': SafeFile(jobs).read()})
            self._log.error('Trying to recover from error ...')
            for dn in os.listdir(root_dn):
                yield (None, os.path.join(root_dn, dn))

        # return unretrievable jobs
        for jobnum in todo:
            yield (jobnum, None)

        remove_files([jobs, tmp_dn])
コード例 #9
0
	def _assign_dataset_block(self, map_key2fm_list, map_key2metadata_dict, file_metadata_iter):
		# Split files into blocks/datasets via key functions and determine metadata intersection
		for (url, metadata_dict, entries, location_list, obj_dict) in file_metadata_iter:
			# Dataset hash always includes dataset expr and nickname override
			hash_dataset = self._get_hash(self._hash_input_set_dataset, metadata_dict,
				md5_hex(repr(self._dataset_expr)) + md5_hex(repr(self._dataset_nick_override)))
			# Block hash always includes the dataset hash and location list
			hash_block = self._get_hash(self._hash_input_set_block, metadata_dict,
				hash_dataset + md5_hex(repr(location_list)))

			if not self._selected_hash_list_dataset or (hash_dataset in self._selected_hash_list_dataset):
				if not self._selected_hash_list_block or (hash_block in self._selected_hash_list_block):
					metadata_dict.update({'DS_KEY': hash_dataset, 'BLOCK_KEY': hash_block})
					self._assign_dataset_block_selected(map_key2fm_list, map_key2metadata_dict,
						(url, metadata_dict, entries, location_list, obj_dict),
						hash_dataset, hash_block, metadata_dict)
コード例 #10
0
ファイル: dproc_check.py プロジェクト: tolange/grid-control
    def process_block(self, block):
        # Check uniqueness of URLs
        url_hash_list = []
        if self._check_url != DatasetUniqueMode.ignore:
            block[DataProvider.FileList] = list(
                self._process_fi_list(url_hash_list,
                                      block[DataProvider.FileList]))
            url_hash_list.sort()

        # Check uniqueness of blocks
        if self._check_block != DatasetUniqueMode.ignore:
            block_hash = md5_hex(
                repr((block.get(DataProvider.Dataset),
                      block[DataProvider.BlockName], url_hash_list,
                      block[DataProvider.NEntries],
                      block[DataProvider.Locations],
                      block.get(DataProvider.Metadata))))
            if block_hash in self._recorded_block:
                msg = 'Multiple occurences of block: "%s"!' % DataProvider.get_block_id(
                    block)
                msg += ' (This check can be configured with %r)' % 'dataset check unique block'
                if self._check_block == DatasetUniqueMode.warn:
                    self._log.warning(msg)
                elif self._check_block == DatasetUniqueMode.abort:
                    raise DatasetError(msg)
                elif self._check_block == DatasetUniqueMode.skip:
                    return None
            self._recorded_block.add(block_hash)
        return block
コード例 #11
0
def collapse_psp_list(psp_list, tracked_list, opts):
    psp_dict = {}
    psp_dict_nicks = {}
    header_list = [('COLLATE_JOBS', '# of jobs')]
    if 'DATASETSPLIT' in tracked_list:
        tracked_list.remove('DATASETSPLIT')
        if opts.collapse == 1:
            tracked_list.append('DATASETNICK')
            header_list.append(('DATASETNICK', 'DATASETNICK'))
        elif opts.collapse == 2:
            header_list.append(('COLLATE_NICK', '# of nicks'))
    for pset in psp_list:
        if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
            pset.pop('DATASETSPLIT')
        nickname = None
        if ('DATASETNICK' in pset) and (opts.collapse == 2):
            nickname = pset.pop('DATASETNICK')
        hash_str = md5_hex(
            repr(lmap(lambda key: pset.get(str(key)), tracked_list)))
        psp_dict.setdefault(hash_str, []).append(pset)
        psp_dict_nicks.setdefault(hash_str, set()).add(nickname)

    def _do_collate(hash_str):
        psp = psp_dict[hash_str][0]
        psp['COLLATE_JOBS'] = len(psp_dict[hash_str])
        psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str])
        return psp

    psp_list = sorted(imap(_do_collate, psp_dict),
                      key=lambda x: tuple(imap(str, x.values())))
    return (header_list, psp_list)
コード例 #12
0
 def __init__(self, config, datasetExpr, datasetNick=None):
     ds_config = config.changeView(viewClass='TaggedConfigView',
                                   addNames=[md5_hex(datasetExpr)])
     if os.path.isdir(datasetExpr):
         scan_pipeline = ['OutputDirsFromWork']
         ds_config.set('source directory', datasetExpr)
         datasetExpr = os.path.join(datasetExpr, 'work.conf')
     else:
         scan_pipeline = ['OutputDirsFromConfig', 'MetadataFromTask']
         datasetExpr, selector = utils.optSplit(datasetExpr, '%')
         ds_config.set('source config', datasetExpr)
         ds_config.set('source job selector', selector)
     ext_config = create_config(datasetExpr)
     ext_task_name = ext_config.changeView(setSections=['global']).get(
         ['module', 'task'])
     if 'ParaMod' in ext_task_name:  # handle old config files
         ext_task_name = ext_config.changeView(
             setSections=['ParaMod']).get('module')
     ext_task_cls = Plugin.getClass(ext_task_name)
     for ext_task_cls in Plugin.getClass(ext_task_name).iterClassBases():
         try:
             scan_holder = GCProviderSetup.getClass('GCProviderSetup_' +
                                                    ext_task_cls.__name__)
         except PluginError:
             continue
         scan_pipeline += scan_holder.scan_pipeline
         break
     ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick,
                               scan_pipeline)
コード例 #13
0
def collapse_psp_list(psp_list, tracked_list, opts):
	psp_dict = {}
	psp_dict_nicks = {}
	header_list = [('COLLATE_JOBS', '# of jobs')]
	if 'DATASETSPLIT' in tracked_list:
		tracked_list.remove('DATASETSPLIT')
		if opts.collapse == 1:
			tracked_list.append('DATASETNICK')
			header_list.append(('DATASETNICK', 'DATASETNICK'))
		elif opts.collapse == 2:
			header_list.append(('COLLATE_NICK', '# of nicks'))
	for pset in psp_list:
		if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
			pset.pop('DATASETSPLIT')
		nickname = None
		if ('DATASETNICK' in pset) and (opts.collapse == 2):
			nickname = pset.pop('DATASETNICK')
		hash_str = md5_hex(repr(lmap(lambda key: pset.get(str(key)), tracked_list)))
		psp_dict.setdefault(hash_str, []).append(pset)
		psp_dict_nicks.setdefault(hash_str, set()).add(nickname)

	def _do_collate(hash_str):
		psp = psp_dict[hash_str][0]
		psp['COLLATE_JOBS'] = len(psp_dict[hash_str])
		psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str])
		return psp
	psp_list = sorted(imap(_do_collate, psp_dict), key=lambda x: tuple(imap(str, x.values())))
	return (header_list, psp_list)
コード例 #14
0
ファイル: provider_base.py プロジェクト: tolange/grid-control
 def _get_dataset_hash(self):
     buffer = StringBuffer()
     for _ in DataProvider.save_to_stream(buffer,
                                          self.iter_blocks_normed()):
         pass
     value = buffer.getvalue()
     buffer.close()
     return md5_hex(value)
コード例 #15
0
 def _prepareSubmit(self, task, jobnum_list, queryArguments):
     jdlFilePath = os.path.join(
         self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' %
         (self.parentPool.wms_name, md5_hex(self.getURI())))
     safe_write(
         open(jdlFilePath, 'w'),
         lmap(lambda line: line + '\n',
              self._getJDLData(task, jobnum_list, queryArguments)))
     return jdlFilePath
コード例 #16
0
ファイル: provider_scan.py プロジェクト: whahmad/grid-control
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		ds_config = config.changeView(viewClass = 'TaggedConfigView', addNames = [md5_hex(datasetExpr)])
		if '*' in os.path.basename(datasetExpr):
			ds_config.set('source directory', os.path.dirname(datasetExpr))
			ds_config.set('filename filter', datasetExpr)
		else:
			ds_config.set('source directory', datasetExpr)
		defScanner = ['FilesFromLS', 'MatchOnFilename', 'MatchDelimeter', 'DetermineEvents', 'AddFilePrefix']
		ScanProviderBase.__init__(self, ds_config, defScanner, datasetNick, datasetID)
コード例 #17
0
ファイル: padapter.py プロジェクト: jolange/grid-control
def _translate_pa2pspi_list(padapter):
	# Reduces parameter adapter output to essential information for diff - faster than keying
	meta_iter = ifilter(lambda k: not k.untracked, padapter.get_job_metadata())
	meta_list = sorted(meta_iter, key=lambda k: k.value)

	for psp in padapter.iter_jobs():  # Translates parameter space point into hash
		psp_item_iter = imap(lambda meta: (meta.value, psp.get(meta.value)), meta_list)
		hash_str = md5_hex(repr(lfilter(itemgetter(1), psp_item_iter)))
		yield (psp[ParameterInfo.ACTIVE], hash_str, psp['GC_PARAM'])
コード例 #18
0
ファイル: parameterList.py プロジェクト: whahmad/grid-control
def list_parameters(opts, psource):
    (result, needGCParam) = get_parameters(opts, psource)
    enabledOutput = opts.output.split(',')
    output = lfilter(lambda k: not opts.output or k in enabledOutput,
                     psource.getJobKeys())
    stored = lfilter(lambda k: k.untracked == False, output)
    untracked = lfilter(lambda k: k.untracked == True, output)

    if opts.collapse > 0:
        result_old = result
        result = {}
        result_nicks = {}
        head = [('COLLATE_JOBS', '# of jobs')]
        if 'DATASETSPLIT' in stored:
            stored.remove('DATASETSPLIT')
            if opts.collapse == 1:
                stored.append('DATASETNICK')
                head.append(('DATASETNICK', 'DATASETNICK'))
            elif opts.collapse == 2:
                head.append(('COLLATE_NICK', '# of nicks'))
        for pset in result_old:
            if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
                pset.pop('DATASETSPLIT')
            nickname = None
            if ('DATASETNICK' in pset) and (opts.collapse == 2):
                nickname = pset.pop('DATASETNICK')
            h = md5_hex(repr(lmap(lambda key: pset.get(str(key)), stored)))
            result.setdefault(h, []).append(pset)
            result_nicks.setdefault(h, set()).add(nickname)

        def doCollate(h):
            tmp = result[h][0]
            tmp['COLLATE_JOBS'] = len(result[h])
            tmp['COLLATE_NICK'] = len(result_nicks[h])
            return tmp

        result = lmap(doCollate, result)
    else:
        head = [('GC_JOB_ID', '#')]
        if needGCParam:
            head.append(('GC_PARAM', 'GC_PARAM'))
    if opts.active:
        head.append((ParameterInfo.ACTIVE, 'ACTIVE'))
    if opts.visible:
        stored = opts.visible.split(',')
    head.extend(sorted(izip(stored, stored)))
    if opts.untracked:
        head.extend(
            sorted(
                imap(
                    lambda n: (n, '(%s)' % n),
                    ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'],
                            untracked))))
    utils.vprint('')
    utils.printTabular(head, result)
コード例 #19
0
 def _prepareSubmit(self, task, jobnum_list, queryArguments):
     localJdlFilePath = os.path.join(
         self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' %
         (self.parentPool.wms_name, md5_hex(self.getURI())))
     readyJobNumList = self._stageSubmitFiles(task, jobnum_list)
     safe_write(
         open(localJdlFilePath, 'w'),
         lmap(lambda line: line + '\n',
              self._getJDLData(task, readyJobNumList, queryArguments)))
     raise NotImplementedError('JDL must get moved to remote')
     return jdlFilePath
コード例 #20
0
ファイル: wms_grid.py プロジェクト: grid-control/grid-control
	def _get_jobs_output(self, gc_id_jobnum_list):
		# Get output of jobs and yield output dirs
		if len(gc_id_jobnum_list) == 0:
			raise StopIteration

		root_dn = os.path.join(self._path_output, 'tmp')
		try:
			if len(gc_id_jobnum_list) == 1:
				# For single jobs create single subdir
				tmp_dn = os.path.join(root_dn, md5_hex(gc_id_jobnum_list[0][0]))
			else:
				tmp_dn = root_dn
			ensure_dir_exists(tmp_dn)
		except Exception:
			raise BackendError('Temporary path "%s" could not be created.' % tmp_dn, BackendError)

		map_gc_id2jobnum = dict(gc_id_jobnum_list)
		jobs = self._write_wms_id_list(gc_id_jobnum_list)

		activity = Activity('retrieving %d job outputs' % len(gc_id_jobnum_list))
		proc = LocalProcess(self._output_exec, '--noint',
			'--logfile', '/dev/stderr', '-i', jobs, '--dir', tmp_dn)

		# yield output dirs
		todo = map_gc_id2jobnum.values()
		current_jobnum = None
		for line in imap(str.strip, proc.stdout.iter(timeout=60)):
			if line.startswith(tmp_dn):
				todo.remove(current_jobnum)
				output_dn = line.strip()
				unpack_wildcard_tar(self._log, output_dn)
				yield (current_jobnum, output_dn)
				current_jobnum = None
			else:
				current_jobnum = map_gc_id2jobnum.get(self._create_gc_id(line), current_jobnum)
		exit_code = proc.status(timeout=0, terminate=True)
		activity.finish()

		if exit_code != 0:
			if 'Keyboard interrupt raised by user' in proc.stderr.read(timeout=0):
				remove_files([jobs, root_dn])
				raise StopIteration
			else:
				self._log.log_process(proc, files={'jobs': SafeFile(jobs).read()})
			self._log.error('Trying to recover from error ...')
			for dn in os.listdir(root_dn):
				yield (None, os.path.join(root_dn, dn))

		# return unretrievable jobs
		for jobnum in todo:
			yield (jobnum, None)

		remove_files([jobs, tmp_dn])
コード例 #21
0
def create_dbs3_json_files(opts, block_info, block_dump):
	block_size = 0
	dataset_type = set()
	for file_info in block_info[DataProvider.FileList]:
		metadata_info = dict(izip(block_info[DataProvider.Metadata], file_info[DataProvider.Metadata]))
		if metadata_info['CMSSW_DATATYPE']:  # this is not always correctly filled
			dataset_type.add(metadata_info['CMSSW_DATATYPE'])
		file_size = metadata_info['SE_OUTPUT_SIZE']
		lfn = file_info[DataProvider.URL]

		# add file information
		block_dump['files'].append({
			'logical_file_name': lfn, 'file_size': file_size,
			'check_sum': metadata_info['SE_OUTPUT_HASH_CRC32'],
			'md5': metadata_info['SE_OUTPUT_HASH_MD5'],
			'adler32': 'NOTSET',
			'file_lumi_list': lmap(lambda run_lumi:
				{'run_num': run_lumi[0], 'lumi_section_num': run_lumi[1]}, metadata_info['CMSSW_LUMIS']),
			'event_count': metadata_info['CMSSW_EVENTS_WRITE'],
			'file_type': 'EDM',
			'auto_cross_section': 0.0,
		})

		# add file parentage information
		if not opts.no_parents:
			block_dump['file_parent_list'].extend(imap(lambda parent_lfn:
				{'logical_file_name': lfn, 'parent_logical_file_name': parent_lfn},
				metadata_info['CMSSW_PARENT_LFN']))

		# fill file / dataset configurations
		dataset_conf_dict = {
			'release_version': metadata_info['CMSSW_VERSION'],
			'pset_hash': metadata_info['CMSSW_CONFIG_HASH'],
			'app_name': 'cmsRun',
			'output_module_label': 'crab2_mod_label',
			'global_tag': metadata_info.get('CMSSW_GLOBALTAG', opts.globaltag)
		}
		if opts.unique_cfg:
			dataset_conf_dict['pset_hash'] = md5_hex(dataset_conf_dict['pset_hash'] +
				block_info[DataProvider.Dataset])
		if dataset_conf_dict not in block_dump['dataset_conf_list']:
			block_dump['dataset_conf_list'].append(dataset_conf_dict)

		# file configurations also specifies lfn
		file_conf_dict = dict(dataset_conf_dict)
		file_conf_dict['lfn'] = lfn
		block_dump['file_conf_list'].append(file_conf_dict)

		# update block size for block summary information
		block_size += file_size
	return (block_size, dataset_type)
コード例 #22
0
	def __init__(self, config, name):
		NamedPlugin.__init__(self, config, name)
		initSandbox = changeInitNeeded('sandbox')
		self._varCheck = validNoVar(config)

		# Task requirements
		jobs_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['jobs'], addTags = [self]) # Move this into parameter manager?
		self.wallTime = jobs_config.getTime('wall time', onChange = None)
		self.cpuTime = jobs_config.getTime('cpu time', self.wallTime, onChange = None)
		self.cpus = jobs_config.getInt('cpus', 1, onChange = None)
		self.memory = jobs_config.getInt('memory', -1, onChange = None)
		self.nodeTimeout = jobs_config.getTime('node timeout', -1, onChange = initSandbox)

		# Compute / get task ID
		self.taskID = config.get('task id', 'GC' + md5_hex(str(time()))[:12], persistent = True)
		self.taskDate = config.get('task date', strftime('%Y-%m-%d'), persistent = True, onChange = initSandbox)
		self.taskConfigName = config.getConfigName()
		self._job_name_generator = config.getPlugin('job name generator', 'DefaultJobName',
			cls = JobNamePlugin, pargs = (self,))

		# Storage setup
		storage_config = config.changeView(viewClass = 'TaggedConfigView',
			setClasses = None, setNames = None, addSections = ['storage'], addTags = [self])
		self.taskVariables = {
			# Space limits
			'SCRATCH_UL': storage_config.getInt('scratch space used', 5000, onChange = initSandbox),
			'SCRATCH_LL': storage_config.getInt('scratch space left', 1, onChange = initSandbox),
			'LANDINGZONE_UL': storage_config.getInt('landing zone space used', 100, onChange = initSandbox),
			'LANDINGZONE_LL': storage_config.getInt('landing zone space left', 1, onChange = initSandbox),
		}
		storage_config.set('se output pattern', 'job_@GC_JOB_ID@_@X@')
		self.seMinSize = storage_config.getInt('se min size', -1, onChange = initSandbox)

		self.sbInputFiles = config.getPaths('input files', [], onChange = initSandbox)
		self.sbOutputFiles = config.getList('output files', [], onChange = initSandbox)
		self.gzipOut = config.getBool('gzip output', True, onChange = initSandbox)

		self._subst_files = config.getList('subst files', [], onChange = initSandbox)
		self.dependencies = lmap(str.lower, config.getList('depends', [], onChange = initSandbox))

		# Get error messages from gc-run.lib comments
		self.errorDict = {}
		self.updateErrorDict(utils.pathShare('gc-run.lib'))

		# Init parameter source manager
		psrc_repository = {}
		self._setupJobParameters(config, psrc_repository)
		self._pfactory = config.getPlugin('internal parameter factory', 'BasicParameterFactory',
			cls = ParameterFactory, pargs = (psrc_repository,), tags = [self], inherit = True)
		self.source = config.getPlugin('parameter adapter', 'TrackedParameterAdapter',
			cls = ParameterAdapter, pargs = (self._pfactory.getSource(),))
コード例 #23
0
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)

		config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['datasource %s' % datasetExpr])
		self._block = self._readBlockFromConfig(config, datasetExpr, datasetNick, datasetID)

		dataset_hash_new = md5_hex(repr(self._block))
		dataset_hash_old = config.get('dataset hash', dataset_hash_new, persistent = True)
		self._request_resync = dataset_hash_new != dataset_hash_old
		if self._request_resync:
			self._log.critical('Dataset %r changed', datasetExpr)
			config.setState(True, 'resync', detail = 'dataset')
			config.setState(True, 'resync', detail = 'parameters')
			config.set('dataset hash', dataset_hash_new)
コード例 #24
0
	def __init__(self, config, name):
		NamedPlugin.__init__(self, config, name)
		initSandbox = changeInitNeeded('sandbox')
		self._varCheck = validNoVar(config)

		# Task requirements
		jobs_config = config.changeView(viewClass = 'TaggedConfigView', addSections = ['jobs'], addTags = [self]) # Move this into parameter manager?
		self.wallTime = jobs_config.getTime('wall time', onChange = None)
		self.cpuTime = jobs_config.getTime('cpu time', self.wallTime, onChange = None)
		self.cpus = jobs_config.getInt('cpus', 1, onChange = None)
		self.memory = jobs_config.getInt('memory', -1, onChange = None)
		self.nodeTimeout = jobs_config.getTime('node timeout', -1, onChange = initSandbox)

		# Compute / get task ID
		self.taskID = config.get('task id', 'GC' + md5_hex(str(time()))[:12], persistent = True)
		self.taskDate = config.get('task date', strftime('%Y-%m-%d'), persistent = True, onChange = initSandbox)
		self.taskConfigName = config.getConfigName()
		self._job_name_generator = config.getPlugin('job name generator', 'DefaultJobName',
			cls = JobNamePlugin, pargs = (self,))

		# Storage setup
		storage_config = config.changeView(viewClass = 'TaggedConfigView',
			setClasses = None, setNames = None, addSections = ['storage'], addTags = [self])
		self.taskVariables = {
			# Space limits
			'SCRATCH_UL': storage_config.getInt('scratch space used', 5000, onChange = initSandbox),
			'SCRATCH_LL': storage_config.getInt('scratch space left', 1, onChange = initSandbox),
			'LANDINGZONE_UL': storage_config.getInt('landing zone space used', 100, onChange = initSandbox),
			'LANDINGZONE_LL': storage_config.getInt('landing zone space left', 1, onChange = initSandbox),
		}
		storage_config.set('se output pattern', 'job_@GC_JOB_ID@_@X@')
		self.seMinSize = storage_config.getInt('se min size', -1, onChange = initSandbox)

		self.sbInputFiles = config.getPaths('input files', [], onChange = initSandbox)
		self.sbOutputFiles = config.getList('output files', [], onChange = initSandbox)
		self.gzipOut = config.getBool('gzip output', True, onChange = initSandbox)

		self.substFiles = config.getList('subst files', [], onChange = initSandbox)
		self.dependencies = lmap(str.lower, config.getList('depends', [], onChange = initSandbox))

		# Get error messages from gc-run.lib comments
		self.errorDict = {}
		self.updateErrorDict(utils.pathShare('gc-run.lib'))

		# Init parameter source manager
		self._setupJobParameters(config)
		self._pfactory = config.getPlugin('internal parameter factory', 'BasicParameterFactory',
			cls = ParameterFactory, tags = [self], inherit = True)
		self.source = config.getPlugin('parameter adapter', 'TrackedParameterAdapter',
			cls = ParameterAdapter, pargs = (self._pfactory.getSource(),))
コード例 #25
0
			def processFI(fiList):
				for fi in fiList:
					urlHash = md5_hex(repr((fi[DataProvider.URL], fi[DataProvider.NEntries], fi.get(DataProvider.Metadata))))
					if urlHash in self._recordedURL:
						msg = 'Multiple occurences of URL: %r!' % fi[DataProvider.URL]
						msg += ' (This check can be configured with %r)' % 'dataset check unique url'
						if self._checkURL == DatasetUniqueMode.warn:
							self._log.warning(msg)
						elif self._checkURL == DatasetUniqueMode.abort:
							raise DatasetError(msg)
						elif self._checkURL == DatasetUniqueMode.skip:
							continue
					self._recordedURL.add(urlHash)
					recordedBlockURL.append(urlHash)
					yield fi
コード例 #26
0
ファイル: provider_scan.py プロジェクト: Fra-nk/grid-control
	def __init__(self, config, datasetExpr, datasetNick = None):
		ds_config = config.changeView(viewClass = 'TaggedConfigView', addNames = [md5_hex(datasetExpr)])
		basename = os.path.basename(datasetExpr)
		firstScanner = 'FilesFromLS'
		if '*' in basename:
			ds_config.set('source directory', datasetExpr.replace(basename, ''))
			ds_config.set('filename filter', basename)
		elif not datasetExpr.endswith('.dbs'):
			ds_config.set('source directory', datasetExpr)
		else:
			ds_config.set('source dataset path', datasetExpr)
			ds_config.set('filename filter', '')
			firstScanner = 'FilesFromDataProvider'
		defScanner = [firstScanner, 'MatchOnFilename', 'MatchDelimeter', 'DetermineEvents', 'AddFilePrefix']
		ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick, defScanner)
コード例 #27
0
ファイル: dproc_check.py プロジェクト: Fra-nk/grid-control
			def processFI(fiList):
				for fi in fiList:
					urlHash = md5_hex(repr((fi[DataProvider.URL], fi[DataProvider.NEntries], fi.get(DataProvider.Metadata))))
					if urlHash in self._recordedURL:
						msg = 'Multiple occurences of URL: %r!' % fi[DataProvider.URL]
						msg += ' (This check can be configured with %r)' % 'dataset check unique url'
						if self._checkURL == DatasetUniqueMode.warn:
							self._log.warning(msg)
						elif self._checkURL == DatasetUniqueMode.abort:
							raise DatasetError(msg)
						elif self._checkURL == DatasetUniqueMode.skip:
							continue
					self._recordedURL.add(urlHash)
					recordedBlockURL.append(urlHash)
					yield fi
コード例 #28
0
    def processBlock(self, block):
        # Check uniqueness of URLs
        recordedBlockURL = []
        if self._checkURL != DatasetUniqueMode.ignore:

            def processFI(fiList):
                for fi in fiList:
                    urlHash = md5_hex(
                        repr((fi[DataProvider.URL], fi[DataProvider.NEntries],
                              fi.get(DataProvider.Metadata))))
                    if urlHash in self._recordedURL:
                        msg = 'Multiple occurences of URL: %r!' % fi[
                            DataProvider.URL]
                        msg += ' (This check can be configured with %r)' % 'dataset check unique url'
                        if self._checkURL == DatasetUniqueMode.warn:
                            self._log.warning(msg)
                        elif self._checkURL == DatasetUniqueMode.abort:
                            raise DatasetError(msg)
                        elif self._checkURL == DatasetUniqueMode.skip:
                            continue
                    self._recordedURL.add(urlHash)
                    recordedBlockURL.append(urlHash)
                    yield fi

            block[DataProvider.FileList] = list(
                processFI(block[DataProvider.FileList]))
            recordedBlockURL.sort()

        # Check uniqueness of blocks
        if self._checkBlock != DatasetUniqueMode.ignore:
            blockHash = md5_hex(
                repr((block.get(DataProvider.Dataset),
                      block[DataProvider.BlockName], recordedBlockURL,
                      block[DataProvider.NEntries],
                      block[DataProvider.Locations],
                      block.get(DataProvider.Metadata))))
            if blockHash in self._recordedBlock:
                msg = 'Multiple occurences of block: "%s"!' % DataProvider.bName(
                    block)
                msg += ' (This check can be configured with %r)' % 'dataset check unique block'
                if self._checkBlock == DatasetUniqueMode.warn:
                    self._log.warning(msg)
                elif self._checkBlock == DatasetUniqueMode.abort:
                    raise DatasetError(msg)
                elif self._checkBlock == DatasetUniqueMode.skip:
                    return None
            self._recordedBlock.add(blockHash)
        return block
コード例 #29
0
def list_parameters(opts, psource):
	(result, needGCParam) = get_parameters(opts, psource)
	enabledOutput = opts.output.split(',')
	output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys())
	stored = lfilter(lambda k: k.untracked == False, output)
	untracked = lfilter(lambda k: k.untracked == True, output)

	if opts.collapse > 0:
		result_old = result
		result = {}
		result_nicks = {}
		head = [('COLLATE_JOBS', '# of jobs')]
		if 'DATASETSPLIT' in stored:
			stored.remove('DATASETSPLIT')
			if opts.collapse == 1:
				stored.append('DATASETNICK')
				head.append(('DATASETNICK', 'DATASETNICK'))
			elif opts.collapse == 2:
				head.append(('COLLATE_NICK', '# of nicks'))
		for pset in result_old:
			if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
				pset.pop('DATASETSPLIT')
			nickname = None
			if ('DATASETNICK' in pset) and (opts.collapse == 2):
				nickname = pset.pop('DATASETNICK')
			h = md5_hex(repr(lmap(pset.get, stored)))
			result.setdefault(h, []).append(pset)
			result_nicks.setdefault(h, set()).add(nickname)

		def doCollate(h):
			tmp = result[h][0]
			tmp['COLLATE_JOBS'] = len(result[h])
			tmp['COLLATE_NICK'] = len(result_nicks[h])
			return tmp
		result = lmap(doCollate, result)
	else:
		head = [('GC_JOB_ID', '#')]
		if needGCParam:
			head.append(('GC_PARAM', 'GC_PARAM'))
	if opts.active:
		head.append((ParameterInfo.ACTIVE, 'ACTIVE'))
	if opts.visible:
		stored = opts.visible.split(',')
	head.extend(sorted(izip(stored, stored)))
	if opts.untracked:
		head.extend(sorted(imap(lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked))))
	utils.vprint('')
	utils.printTabular(head, result)
コード例 #30
0
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		ds_config = config.change_view(view_class='TaggedConfigView', add_names=[md5_hex(dataset_expr)])
		basename = os.path.basename(dataset_expr)
		scanner_first = 'FilesFromLS'
		if '*' in basename:
			ds_config.set('source directory', dataset_expr.replace(basename, ''))
			ds_config.set('filename filter', basename)
		elif not dataset_expr.endswith('.dbs'):
			ds_config.set('source directory', dataset_expr)
		else:
			ds_config.set('source dataset path', dataset_expr)
			ds_config.set('filename filter', '')
			scanner_first = 'FilesFromDataProvider'
		scanner_list_default = [scanner_first, 'MatchOnFilename', 'MatchDelimeter',
			'DetermineEvents', 'AddFilePrefix']
		ScanProviderBase.__init__(self, ds_config, datasource_name, dataset_expr,
			dataset_nick, dataset_proc, scanner_list_default)
コード例 #31
0
ファイル: provider_scan.py プロジェクト: whahmad/grid-control
	def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0):
		ds_config = config.changeView(viewClass = 'TaggedConfigView', addNames = [md5_hex(datasetExpr)])
		if os.path.isdir(datasetExpr):
			GCProvider.stageDir[None] = ['OutputDirsFromWork']
			ds_config.set('source directory', datasetExpr)
			datasetExpr = os.path.join(datasetExpr, 'work.conf')
		else:
			GCProvider.stageDir[None] = ['OutputDirsFromConfig', 'MetadataFromTask']
			datasetExpr, selector = utils.optSplit(datasetExpr, '%')
			ds_config.set('source config', datasetExpr)
			ds_config.set('source job selector', selector)
		ext_config = createConfig(datasetExpr)
		ext_task_name = ext_config.changeView(setSections = ['global']).get(['task', 'module'])
		if 'ParaMod' in ext_task_name: # handle old config files
			ext_task_name = ext_config.changeView(setSections = ['ParaMod']).get('module')
		sGet = lambda scannerDict: scannerDict.get(None) + scannerDict.get(ext_task_name, [])
		sList = sGet(GCProvider.stageDir) + ['JobInfoFromOutputDir', 'FilesFromJobInfo'] + sGet(GCProvider.stageFile) + ['DetermineEvents', 'AddFilePrefix']
		ScanProviderBase.__init__(self, ds_config, sList, datasetNick, datasetID)
コード例 #32
0
    def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0):
        DataProvider.__init__(self, config, datasetExpr, datasetNick,
                              datasetID)

        config = config.changeView(viewClass='SimpleConfigView',
                                   setSections=['datasource %s' % datasetExpr])
        self._block = self._readBlockFromConfig(config, datasetExpr,
                                                datasetNick, datasetID)

        dataset_hash_new = md5_hex(repr(self._block))
        dataset_hash_old = config.get('dataset hash',
                                      dataset_hash_new,
                                      persistent=True)
        self._request_resync = dataset_hash_new != dataset_hash_old
        if self._request_resync:
            self._log.critical('Dataset %r changed', datasetExpr)
            config.setState(True, 'resync', detail='dataset')
            config.setState(True, 'resync', detail='parameters')
            config.set('dataset hash', dataset_hash_new)
コード例 #33
0
 def __init__(self, config, datasetExpr, datasetNick=None):
     ds_config = config.changeView(viewClass='TaggedConfigView',
                                   addNames=[md5_hex(datasetExpr)])
     basename = os.path.basename(datasetExpr)
     firstScanner = 'FilesFromLS'
     if '*' in basename:
         ds_config.set('source directory',
                       datasetExpr.replace(basename, ''))
         ds_config.set('filename filter', basename)
     elif not datasetExpr.endswith('.dbs'):
         ds_config.set('source directory', datasetExpr)
     else:
         ds_config.set('source dataset path', datasetExpr)
         ds_config.set('filename filter', '')
         firstScanner = 'FilesFromDataProvider'
     defScanner = [
         firstScanner, 'MatchOnFilename', 'MatchDelimeter',
         'DetermineEvents', 'AddFilePrefix'
     ]
     ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick,
                               defScanner)
コード例 #34
0
	def bulkSubmissionBegin(self):
		self._submitParams.update({ '-d': None })
		if self._discovery_module:
			self._submitParams.update({ '-e': self._discovery_module.getWMS() })
		if self._useDelegate is False:
			self._submitParams.update({ '-a': ' ' })
			return True
		dID = 'GCD' + md5_hex(str(time.time()))[:10]
		activity = utils.ActivityLog('creating delegate proxy for job submission')
		deletegateArgs = []
		if self._configVO:
			deletegateArgs.extend(['--config', self._configVO])
		proc = LocalProcess(self._delegateExec, '-d', dID, '--noint', '--logfile', '/dev/stderr', *deletegateArgs)
		output = proc.get_output(timeout = 10, raise_errors = False)
		if ('glite-wms-job-delegate-proxy Success' in output) and (dID in output):
			self._submitParams.update({ '-d': dID })
		del activity

		if proc.status(timeout = 0, terminate = True) != 0:
			self._log.log_process(proc)
		return (self._submitParams.get('-d', None) is not None)
コード例 #35
0
ファイル: wms_cream.py プロジェクト: tolange/grid-control
    def _begin_bulk_submission(self):
        self._submit_args_dict.update({'-D': None})
        if self._use_delegate is False:
            self._submit_args_dict.update({'-a': ' '})
            return True
        delegate_id = 'GCD' + md5_hex(str(time.time()))[:10]
        activity = Activity('creating delegate proxy for job submission')
        delegate_arg_list = ['-e', self._ce[:self._ce.rfind("/")]]
        if self._config_fn:
            delegate_arg_list.extend(['--config', self._config_fn])
        proc = LocalProcess(self._delegate_exec, '-d', delegate_id,
                            '--logfile', '/dev/stderr', *delegate_arg_list)
        output = proc.get_output(timeout=10, raise_errors=False)
        if ('succesfully delegated to endpoint' in output) and (delegate_id
                                                                in output):
            self._submit_args_dict.update({'-D': delegate_id})
        activity.finish()

        if proc.status(timeout=0, terminate=True) != 0:
            self._log.log_process(proc)
        return self._submit_args_dict.get('-D') is not None
コード例 #36
0
	def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None):
		ds_config = config.change_view(view_class='TaggedConfigView', add_names=[md5_hex(dataset_expr)])
		if os.path.isdir(dataset_expr):
			scanner_list = ['OutputDirsFromWork']
			ds_config.set('source directory', dataset_expr)
			dataset_expr = os.path.join(dataset_expr, 'work.conf')
		else:
			scanner_list = ['OutputDirsFromConfig', 'MetadataFromTask']
			dataset_expr, selector = split_opt(dataset_expr, '%')
			ds_config.set('source config', dataset_expr)
			ds_config.set('source job selector', selector)
		ext_config = create_config(dataset_expr)
		ext_task_name = ext_config.change_view(set_sections=['global']).get(['module', 'task'])
		ext_task_cls = Plugin.get_class(ext_task_name)
		for ext_task_cls in Plugin.get_class(ext_task_name).iter_class_bases():
			scan_setup_name = 'GCProviderSetup_' + ext_task_cls.__name__
			scan_setup_cls = GCProviderSetup.get_class(scan_setup_name, ignore_missing=True)
			if scan_setup_cls:
				scanner_list += scan_setup_cls.scanner_list
				break
		ScanProviderBase.__init__(self, ds_config, datasource_name, dataset_expr,
			dataset_nick, dataset_proc, scanner_list)
コード例 #37
0
	def _begin_bulk_submission(self):
		self._submit_args_dict.update({'-d': None})
		if self._discovery_plugin:
			self._submit_args_dict.update({'-e': self._discovery_plugin.get_endpoint()})
		if self._use_delegate is False:
			self._submit_args_dict.update({'-a': ' '})
			return True
		delegate_id = 'GCD' + md5_hex(str(time.time()))[:10]
		activity = Activity('creating delegate proxy for job submission')
		delegate_arg_list = []
		if self._config_fn:
			delegate_arg_list.extend(['--config', self._config_fn])
		proc = LocalProcess(self._delegate_exec, '-d', delegate_id,
			'--noint', '--logfile', '/dev/stderr', *delegate_arg_list)
		output = proc.get_output(timeout=10, raise_errors=False)
		if ('glite-wms-job-delegate-proxy Success' in output) and (delegate_id in output):
			self._submit_args_dict.update({'-d': delegate_id})
		activity.finish()

		if proc.status(timeout=0, terminate=True) != 0:
			self._log.log_process(proc)
		return self._submit_args_dict.get('-d') is not None
コード例 #38
0
ファイル: dproc_check.py プロジェクト: Fra-nk/grid-control
	def processBlock(self, block):
		# Check uniqueness of URLs
		recordedBlockURL = []
		if self._checkURL != DatasetUniqueMode.ignore:
			def processFI(fiList):
				for fi in fiList:
					urlHash = md5_hex(repr((fi[DataProvider.URL], fi[DataProvider.NEntries], fi.get(DataProvider.Metadata))))
					if urlHash in self._recordedURL:
						msg = 'Multiple occurences of URL: %r!' % fi[DataProvider.URL]
						msg += ' (This check can be configured with %r)' % 'dataset check unique url'
						if self._checkURL == DatasetUniqueMode.warn:
							self._log.warning(msg)
						elif self._checkURL == DatasetUniqueMode.abort:
							raise DatasetError(msg)
						elif self._checkURL == DatasetUniqueMode.skip:
							continue
					self._recordedURL.add(urlHash)
					recordedBlockURL.append(urlHash)
					yield fi
			block[DataProvider.FileList] = list(processFI(block[DataProvider.FileList]))
			recordedBlockURL.sort()

		# Check uniqueness of blocks
		if self._checkBlock != DatasetUniqueMode.ignore:
			blockHash = md5_hex(repr((block.get(DataProvider.Dataset), block[DataProvider.BlockName],
				recordedBlockURL, block[DataProvider.NEntries],
				block[DataProvider.Locations], block.get(DataProvider.Metadata))))
			if blockHash in self._recordedBlock:
				msg = 'Multiple occurences of block: "%s"!' % DataProvider.bName(block)
				msg += ' (This check can be configured with %r)' % 'dataset check unique block'
				if self._checkBlock == DatasetUniqueMode.warn:
					self._log.warning(msg)
				elif self._checkBlock == DatasetUniqueMode.abort:
					raise DatasetError(msg)
				elif self._checkBlock == DatasetUniqueMode.skip:
					return None
			self._recordedBlock.add(blockHash)
		return block
コード例 #39
0
	def process_block(self, block):
		# Check uniqueness of URLs
		url_hash_list = []
		if self._check_url != DatasetUniqueMode.ignore:
			block[DataProvider.FileList] = list(self._process_fi_list(url_hash_list,
				block[DataProvider.FileList]))
			url_hash_list.sort()

		# Check uniqueness of blocks
		if self._check_block != DatasetUniqueMode.ignore:
			block_hash = md5_hex(repr((block.get(DataProvider.Dataset), block[DataProvider.BlockName],
				url_hash_list, block[DataProvider.NEntries],
				block[DataProvider.Locations], block.get(DataProvider.Metadata))))
			if block_hash in self._recorded_block:
				msg = 'Multiple occurences of block: "%s"!' % DataProvider.get_block_id(block)
				msg += ' (This check can be configured with %r)' % 'dataset check unique block'
				if self._check_block == DatasetUniqueMode.warn:
					self._log.warning(msg)
				elif self._check_block == DatasetUniqueMode.abort:
					raise DatasetError(msg)
				elif self._check_block == DatasetUniqueMode.skip:
					return None
			self._recorded_block.add(block_hash)
		return block
コード例 #40
0
ファイル: wms_cream.py プロジェクト: tolange/grid-control
    def _get_jobs_output(self, gc_id_jobnum_list):
        # Get output of jobs and yield output dirs
        if len(gc_id_jobnum_list) == 0:
            raise StopIteration

        tmp_dn = os.path.join(self._path_output, 'tmp')
        try:
            if len(gc_id_jobnum_list) == 1:
                # For single jobs create single subdir
                tmp_dn = os.path.join(tmp_dn, md5_hex(gc_id_jobnum_list[0][0]))
            ensure_dir_exists(tmp_dn)
        except Exception:
            raise BackendError(
                'Temporary path "%s" could not be created.' % tmp_dn,
                BackendError)

        map_gc_id2jobnum = dict(gc_id_jobnum_list)
        jobnum_list_todo = list(map_gc_id2jobnum.values())
        wms_id_list_done = []
        activity = Activity('retrieving %d job outputs' %
                            len(gc_id_jobnum_list))
        chunk_pos_iter = irange(0, len(gc_id_jobnum_list), self._chunk_size)
        for ids in imap(lambda x: gc_id_jobnum_list[x:x + self._chunk_size],
                        chunk_pos_iter):
            for (current_jobnum, output_dn) in self.get_jobs_output_chunk(
                    tmp_dn, ids, wms_id_list_done):
                unpack_wildcard_tar(self._log, output_dn)
                jobnum_list_todo.remove(current_jobnum)
                yield (current_jobnum, output_dn)
        activity.finish()

        # return unretrievable jobs
        for jobnum in jobnum_list_todo:
            yield (jobnum, None)
        self._purge_done_jobs(wms_id_list_done)
        remove_files([tmp_dn])
コード例 #41
0
ファイル: provider_scan.py プロジェクト: Fra-nk/grid-control
	def __init__(self, config, datasetExpr, datasetNick = None):
		ds_config = config.changeView(viewClass = 'TaggedConfigView', addNames = [md5_hex(datasetExpr)])
		if os.path.isdir(datasetExpr):
			scan_pipeline = ['OutputDirsFromWork']
			ds_config.set('source directory', datasetExpr)
			datasetExpr = os.path.join(datasetExpr, 'work.conf')
		else:
			scan_pipeline = ['OutputDirsFromConfig', 'MetadataFromTask']
			datasetExpr, selector = utils.optSplit(datasetExpr, '%')
			ds_config.set('source config', datasetExpr)
			ds_config.set('source job selector', selector)
		ext_config = create_config(datasetExpr)
		ext_task_name = ext_config.changeView(setSections = ['global']).get(['module', 'task'])
		if 'ParaMod' in ext_task_name: # handle old config files
			ext_task_name = ext_config.changeView(setSections = ['ParaMod']).get('module')
		ext_task_cls = Plugin.getClass(ext_task_name)
		for ext_task_cls in Plugin.getClass(ext_task_name).iterClassBases():
			try:
				scan_holder = GCProviderSetup.getClass('GCProviderSetup_' + ext_task_cls.__name__)
			except PluginError:
				continue
			scan_pipeline += scan_holder.scan_pipeline
			break
		ScanProviderBase.__init__(self, ds_config, datasetExpr, datasetNick, scan_pipeline)
コード例 #42
0
    def __init__(self, config,
                 name):  # Read configuration options and init vars
        NamedPlugin.__init__(self, config, name)
        init_sandbox = TriggerInit('sandbox')
        self._var_checker = NoVarCheck(config)

        # Task requirements
        # Move this into parameter manager?
        jobs_config = config.change_view(view_class='TaggedConfigView',
                                         add_sections=['jobs'],
                                         add_tags=[self])
        self.wall_time = jobs_config.get_time('wall time', on_change=None)
        self._cpu_time = jobs_config.get_time('cpu time',
                                              self.wall_time,
                                              on_change=None)
        self._cores = jobs_config.get_int(['cores', 'cpus'], 1, on_change=None)
        self._memory = jobs_config.get_int('memory', -1, on_change=None)
        self._job_timeout = jobs_config.get_time('node timeout',
                                                 -1,
                                                 on_change=init_sandbox)

        # Compute / get task ID
        self._task_id = config.get('task id',
                                   'GC' + md5_hex(str(time.time()))[:12],
                                   persistent=True)
        self._task_date = config.get('task date',
                                     time.strftime('%Y-%m-%d'),
                                     persistent=True,
                                     on_change=init_sandbox)
        self._task_time = config.get('task time',
                                     time.strftime('%H%M%S'),
                                     persistent=True,
                                     on_change=init_sandbox)
        task_name_generator = config.get_plugin('task name generator',
                                                'DefaultTaskName',
                                                cls=TaskNamePlugin)
        self._task_name = task_name_generator.get_name(self)
        self._task_config_name = config.get_config_name()
        self._job_name_generator = config.get_plugin('job name generator',
                                                     'DefaultJobName',
                                                     cls=JobNamePlugin)

        # Storage setup
        storage_config = config.change_view(view_class='TaggedConfigView',
                                            set_classes=None,
                                            set_names=None,
                                            add_sections=['storage'],
                                            add_tags=[self])
        scratch_space_used = storage_config.get_int('scratch space used',
                                                    5000,
                                                    on_change=init_sandbox)
        lz_space_used = storage_config.get_int('landing zone space used',
                                               100,
                                               on_change=init_sandbox)
        self._task_var_dict = {
            # Space limits
            'SCRATCH_UL':
            scratch_space_used,
            'SCRATCH_LL':
            storage_config.get_int('scratch space left',
                                   1,
                                   on_change=init_sandbox),
            'LANDINGZONE_UL':
            lz_space_used,
            'LANDINGZONE_LL':
            storage_config.get_int('landing zone space left',
                                   1,
                                   on_change=init_sandbox),
        }
        storage_config.set('se output pattern', 'job_@GC_JOB_ID@_@X@')
        self._se_min_size = storage_config.get_int('se min size',
                                                   -1,
                                                   on_change=init_sandbox)
        self._disk_min = max(scratch_space_used, lz_space_used)

        self._sb_in_fn_list = config.get_path_list('input files', [],
                                                   on_change=init_sandbox)
        self._sb_out_fn_list = config.get_list('output files', [],
                                               on_change=init_sandbox)
        self._do_gzip_std_output = config.get_bool('gzip output',
                                                   True,
                                                   on_change=init_sandbox)

        self._subst_files = config.get_list('subst files', [],
                                            on_change=init_sandbox)
        self._dependencies = lmap(
            str.lower, config.get_list('depends', [], on_change=init_sandbox))

        # Get error messages from gc-run.lib comments
        self.map_error_code2msg = {}
        self._update_map_error_code2msg(get_path_share('gc-run.lib'))

        # Init parameter source manager
        psrc_repository = {}
        self._setup_repository(config, psrc_repository)
        pfactory = config.get_plugin('internal parameter factory',
                                     'BasicParameterFactory',
                                     cls=ParameterFactory,
                                     bind_kwargs={
                                         'tags': [self],
                                         'inherit': True
                                     })
        self._source = config.get_plugin(
            'parameter adapter',
            'TrackedParameterAdapter',
            cls=ParameterAdapter,
            pargs=(pfactory.get_psrc(psrc_repository), ))
        self._log.log(logging.DEBUG3, 'Using parameter adapter %s',
                      repr(self._source))

        self._log.info('Current task ID: %s', self._task_id)
        self._log.info('Task started on: %s', self._task_date)
コード例 #43
0
ファイル: plugin_graph.py プロジェクト: Fra-nk/grid-control
def getNodeName(instance):
	return instance.__class__.__name__ + '_' + md5_hex(repr(hash(instance)))
コード例 #44
0
	def _get_hash(self, keys, metadata_dict, hash_seed):
		return md5_hex(repr(hash_seed) + repr(lmap(metadata_dict.get, keys)))
コード例 #45
0
	def getHash(self):
		return md5_hex(self._psource.getHash() + str([self._posStart, self._posEnd]))
コード例 #46
0
	def getHash(self):
		return md5_hex(self._psource.getHash() + str(self.times))
コード例 #47
0
	def getHash(self):
		return md5_hex(str(lmap(lambda p: str(p.getMaxParameters()) + p.getHash(), self._psourceList)))
コード例 #48
0
ファイル: provider_scan.py プロジェクト: Fra-nk/grid-control
	def _generateKey(self, keys, base, path, metadata, events, seList, objStore):
		return md5_hex(repr(base) + repr(lmap(metadata.get, keys)))
コード例 #49
0
	def getHash(self):
		return md5_hex(str(self._srcName) + str(self._dataSplitter.getMaxJobs()) + str(self.resyncEnabled()))