Ejemplo n.º 1
0
	def _resyncInternal(self): # This function is _VERY_ time critical!
		tmp = self._rawSource.resync() # First ask about psource changes
		(redoNewPNum, disableNewPNum, sizeChange) = (set(tmp[0]), set(tmp[1]), tmp[2])
		hashNew = self._rawSource.getHash()
		hashChange = self._storedHash != hashNew
		self._storedHash = hashNew
		if not (redoNewPNum or disableNewPNum or sizeChange or hashChange):
			self._resyncState = None
			return

		psource_old = ParameterAdapter(None, ParameterSource.createInstance('GCDumpParameterSource', self._pathParams))
		psource_new = ParameterAdapter(None, self._rawSource)

		mapJob2PID = {}
		(pAdded, pMissing, _) = self._diffParams(psource_old, psource_new, mapJob2PID, redoNewPNum, disableNewPNum)
		self._source = self._getResyncSource(psource_old, psource_new, mapJob2PID, pAdded, pMissing, disableNewPNum)

		self._mapJob2PID = mapJob2PID # Update Job2PID map
		redoNewPNum = redoNewPNum.difference(disableNewPNum)
		if redoNewPNum or disableNewPNum:
			mapPID2Job = dict(ismap(utils.swap, self._mapJob2PID.items()))
			translate = lambda pNum: mapPID2Job.get(pNum, pNum)
			self._resyncState = (set(imap(translate, redoNewPNum)), set(imap(translate, disableNewPNum)), sizeChange)
		elif sizeChange:
			self._resyncState = (set(), set(), sizeChange)
		# Write resynced state
		self._writeJob2PID(self._pathJob2PID + '.tmp')
		ParameterSource.getClass('GCDumpParameterSource').write(self._pathParams + '.tmp', self)
		os.rename(self._pathJob2PID + '.tmp', self._pathJob2PID)
		os.rename(self._pathParams + '.tmp', self._pathParams)
Ejemplo n.º 2
0
	def _resync_adapter(self, pa_old, pa_new, result_redo, result_disable, size_change):
		(map_jobnum2pnum, pspi_list_added, pspi_list_missing) = _diff_pspi_list(pa_old, pa_new,
			result_redo, result_disable)
		# Reorder and reconstruct parameter space with the following layout:
		# NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file)
		# <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD
		if pspi_list_added:
			_extend_map_jobnum2pnum(map_jobnum2pnum, pa_old.get_job_len(), pspi_list_added)
		if pspi_list_missing:
			# extend the parameter source by placeholders for the missing parameter space points
			psrc_missing = _create_placeholder_psrc(pa_old, pa_new,
				map_jobnum2pnum, pspi_list_missing, result_disable)
			self._psrc = ParameterSource.create_instance('ChainParameterSource',
				self._psrc_raw, psrc_missing)

		self._map_jobnum2pnum = map_jobnum2pnum  # Update Job2PID map
		# Write resynced state
		self._write_jobnum2pnum(self._path_jobnum2pnum + '.tmp')
		ParameterSource.get_class('GCDumpParameterSource').write(self._path_params + '.tmp',
			self.get_job_len(), self.get_job_metadata(), self.iter_jobs())
		os.rename(self._path_jobnum2pnum + '.tmp', self._path_jobnum2pnum)
		os.rename(self._path_params + '.tmp', self._path_params)

		result_redo = result_redo.difference(result_disable)
		if result_redo or result_disable:
			map_pnum2jobnum = reverse_dict(self._map_jobnum2pnum)

			def _translate_pnum(pnum):
				return map_pnum2jobnum.get(pnum, pnum)
			result_redo = set(imap(_translate_pnum, result_redo))
			result_disable = set(imap(_translate_pnum, result_disable))
			return (result_redo, result_disable, size_change)
		return (set(), set(), size_change)
Ejemplo n.º 3
0
def createLookupHelper(pconfig, var_list, lookup_list):
	# Return list of (doElevate, PSourceClass, arguments) entries
	if len(var_list) != 1: # multi-lookup handling
		result = []
		for var_name in var_list:
			result.extend(createLookupHelper(pconfig, [var_name], lookup_list))
		return result
	var_name = var_list[0]

	pvalue = pconfig.getParameter(var_name.lstrip('!'))
	if isinstance(pvalue, list): # simple parameter source
		if len(pvalue) == 1:
			return [(False, ParameterSource.getClass('ConstParameterSource'), [var_name, pvalue[0]])]
		else:
			return [(False, ParameterSource.getClass('SimpleParameterSource'), [var_name, pvalue])]
	elif isinstance(pvalue, tuple) and pvalue[0] == 'format':
		return [(False, ParameterSource.getClass('FormatterParameterSource'), pvalue[1:])]

	lookup_key = None
	if lookup_list: # default lookup key
		lookup_key = KeyParameterSource(*lookup_list)

	# Determine kind of lookup, [3] == lookupDictConfig, [0] == lookupContent
	tmp = lookupConfigParser(pconfig, KeyParameterSource(var_name), lookup_key)
	lookupContent = tmp[3][0]
	lookupLen = lmap(len, lookupContent.values())

	if (min(lookupLen) == 1) and (max(lookupLen) == 1): # simple lookup sufficient for this setup
		return [(False, SimpleLookupParameterSource, list(tmp))]
	# switch needs elevation beyond local scope
	return [(True, SwitchingLookupParameterSource, list(tmp))]
Ejemplo n.º 4
0
 def _tree2expr(self, node, repository):
     if isinstance(node, int):
         return node
     elif isinstance(node, tuple):
         (operator, args) = node
         if operator == '[]':
             psrc_list = []
             for output_vn in _tree2names(args[0]):
                 psrc_list.append(
                     ParameterSource.create_instance(
                         'InternalAutoParameterSource',
                         self._parameter_config, repository, output_vn,
                         _tree2names(args[1])))
             return ParameterSource.create_psrc_safe(
                 'CrossParameterSource', self._parameter_config, repository,
                 *psrc_list)
         elif operator in self._operator_map_raw:
             return ParameterSource.create_psrc_safe(
                 self._operator_map_raw[operator], self._parameter_config,
                 repository, *args)
         elif operator in self._operator_map_eval:
             evaluated_args = lmap(
                 lambda node: self._tree2expr(node, repository), args)
             return ParameterSource.create_psrc_safe(
                 self._operator_map_eval[operator], self._parameter_config,
                 repository, *evaluated_args)
     else:
         return ParameterSource.create_instance(
             'InternalAutoParameterSource', self._parameter_config,
             repository, node)
     raise APIError('Unable to parse node %s!' % repr(node))
Ejemplo n.º 5
0
 def _createAggregatedSource(self, psource_old, psource_new, missingInfos):
     currentInfoKeys = psource_new.getJobKeys()
     missingInfoKeys = lfilter(lambda key: key not in currentInfoKeys,
                               psource_old.getJobKeys())
     ps_miss = ParameterSource.createInstance('InternalParameterSource',
                                              missingInfos, missingInfoKeys)
     return ParameterSource.createInstance('ChainParameterSource',
                                           self._rawSource, ps_miss)
Ejemplo n.º 6
0
    def __init__(self, config, source):
        self._rawSource = source
        BasicParameterAdapter.__init__(self, config, source)
        self._mapJob2PID = {}
        if not os.path.isdir(config.getWorkPath()):
            os.makedirs(config.getWorkPath())
        self._pathJob2PID = config.getWorkPath('params.map.gz')
        self._pathParams = config.getWorkPath('params.dat.gz')

        # Find out if init should be performed - overrides userResync!
        userInit = config.getState('init', detail='parameters')
        needInit = False
        if not (os.path.exists(self._pathParams)
                and os.path.exists(self._pathJob2PID)):
            needInit = True  # Init needed if no parameter log exists
        if userInit and not needInit and (source.getMaxParameters()
                                          is not None):
            utils.eprint(
                'Re-Initialization will overwrite the current mapping between jobs and parameter/dataset content! This can lead to invalid results!'
            )
            if utils.getUserBool(
                    'Do you want to perform a syncronization between the current mapping and the new one to avoid this?',
                    True):
                userInit = False
        doInit = userInit or needInit

        # Find out if resync should be performed
        userResync = config.getState('resync', detail='parameters')
        config.setState(False, 'resync', detail='parameters')
        needResync = False
        pHash = self._rawSource.getHash()
        self.storedHash = config.get('parameter hash', pHash, persistent=True)
        if self.storedHash != pHash:
            needResync = True  # Resync needed if parameters have changed
            self._log.info('Parameter hash has changed')
            self._log.debug('\told hash: %s', self.storedHash)
            self._log.debug('\tnew hash: %s', pHash)
            config.setState(True, 'init', detail='config')
        doResync = (userResync or needResync) and not doInit

        if not doResync and not doInit:  # Reuse old mapping
            activity = utils.ActivityLog(
                'Loading cached parameter information')
            self.readJob2PID()
            activity.finish()
            return
        elif doResync:  # Perform sync
            activity = utils.ActivityLog('Syncronizing parameter information')
            self.storedHash = None
            self._resyncState = self.resync()
            activity.finish()
        elif doInit:  # Write current state
            self.writeJob2PID(self._pathJob2PID)
            ParameterSource.getClass('GCDumpParameterSource').write(
                self._pathParams, self)
        config.set('parameter hash', self._rawSource.getHash())
Ejemplo n.º 7
0
	def __new__(cls, pconfig, repository, output_vn, lookup_vn_list=None):
		parameter_value = pconfig.get_parameter(output_vn.lstrip('!'))
		if isinstance(parameter_value, list):
			if len(parameter_value) != 1:  # Simplify single value parameters to const parameters
				return ParameterSource.create_instance('SimpleParameterSource', output_vn, parameter_value)
			return ParameterSource.create_instance('ConstParameterSource', output_vn, parameter_value[0])
		elif isinstance(parameter_value, tuple) and isinstance(parameter_value[0], str):
			return ParameterSource.create_instance(*parameter_value)
		return ParameterSource.create_instance('InternalAutoLookupParameterSource',
			pconfig, output_vn, lookup_vn_list)
Ejemplo n.º 8
0
	def getSource(self):
		source_list = self._constSources + [self._pfactory.getSource()] + self._lookupSources
		source = ParameterSource.createInstance('ZipLongParameterSource', *source_list)
		for (PSourceClass, args) in self._nestedSources:
			source = PSourceClass(source, *args)
		if self._req:
			req_source = ParameterSource.createInstance('RequirementParameterSource')
			source = ParameterSource.createInstance('ZipLongParameterSource', source, req_source)
		source = self._useAvailableDataSource(source)
		return ParameterSource.createInstance('RepeatParameterSource', source, self._repeat)
Ejemplo n.º 9
0
	def __init__(self, config, source):
		self._psrc_raw = source
		BasicParameterAdapter.__init__(self, config, source)
		self._map_jobnum2pnum = {}
		ensure_dir_exists(config.get_work_path(), 'parameter storage directory', ParameterError)
		self._path_jobnum2pnum = config.get_work_path('params.map.gz')
		self._path_params = config.get_work_path('params.dat.gz')

		# Find out if init should be performed - overrides resync_requested!
		init_requested = config.get_state('init', detail='parameters')
		init_needed = False
		if not (os.path.exists(self._path_params) and os.path.exists(self._path_jobnum2pnum)):
			init_needed = True  # Init needed if no parameter log exists
		if init_requested and not init_needed and (source.get_parameter_len() is not None):
			self._log.warning('Re-Initialization will overwrite the current mapping ' +
				'between jobs and parameter/dataset content! This can lead to invalid results!')
			user_msg = ('Do you want to perform a syncronization between ' +
				'the current mapping and the new one to avoid this?')
			if UserInputInterface().prompt_bool(user_msg, True):
				init_requested = False
		do_init = init_requested or init_needed

		# Find out if resync should be performed
		resync_by_user = config.get_state('resync', detail='parameters')
		config.set_state(False, 'resync', detail='parameters')
		psrc_hash = self._psrc_raw.get_psrc_hash()
		self._psrc_hash_stored = config.get('parameter hash', psrc_hash, persistent=True)
		psrc_hash_changed = self._psrc_hash_stored != psrc_hash  # Resync if parameters have changed
		resync_by_psrc = self._psrc_raw.get_resync_request()

		if do_init:  # Write current state
			self._write_jobnum2pnum(self._path_jobnum2pnum)
			ParameterSource.get_class('GCDumpParameterSource').write(self._path_params,
				self.get_job_len(), self.get_job_metadata(), self.iter_jobs())
		elif resync_by_user or resync_by_psrc or psrc_hash_changed:  # Perform sync
			if psrc_hash_changed:
				self._log.info('Parameter hash has changed')
				self._log.debug('\told hash: %s', self._psrc_hash_stored)
				self._log.debug('\tnew hash: %s', psrc_hash)
				self._log.log(logging.DEBUG1, '\tnew src: %s', self._psrc_raw)
				config.set_state(True, 'init', detail='config')
			elif resync_by_psrc:
				self._log.info('Parameter source requested resync')
				self._log.debug('\t%r', str.join(', ', imap(repr, resync_by_psrc)))
			elif resync_by_user:
				self._log.info('User requested resync')
			self._psrc_hash_stored = None
			self._resync_state = self.resync(force=True)
		else:  # Reuse old mapping
			activity = Activity('Loading cached parameter information')
			self._read_jobnum2pnum()
			activity.finish()
			return  # do not set parameter hash in config
		config.set('parameter hash', self._psrc_raw.get_psrc_hash())
Ejemplo n.º 10
0
	def _createRef(self, arg):
		refTypeDefault = 'dataset'
		DataParameterSource = ParameterSource.getClass('DataParameterSource')
		if arg not in DataParameterSource.datasetsAvailable:
			refTypeDefault = 'csv'
		refType = self._paramConfig.get(arg, 'type', refTypeDefault)
		if refType == 'dataset':
			return DataParameterSource.create(self._paramConfig, arg)
		elif refType == 'csv':
			return ParameterSource.getClass('CSVParameterSource').create(self._paramConfig, arg)
		raise APIError('Unknown reference type: "%s"' % refType)
Ejemplo n.º 11
0
	def _combineSources(self, clsName, args):
		repeat = reduce(lambda a, b: a * b, ifilter(lambda expr: isinstance(expr, int), args), 1)
		args = lfilter(lambda expr: not isinstance(expr, int), args)
		if args:
			result = ParameterSource.createInstance(clsName, *args)
			if repeat > 1:
				return ParameterSource.createInstance('RepeatParameterSource', result, repeat)
			return result
		elif repeat > 1:
			return repeat
		return NullParameterSource()
Ejemplo n.º 12
0
	def __init__(self, fn):
		ParameterSource.__init__(self)
		fp = gzip.open(fn, 'rb')
		keyline = fp.readline().lstrip('#').strip()
		self.keys = []
		if keyline:
			self.keys = eval(keyline)
		def parseLine(line):
			if not line.startswith('#'):
				pNumStr, stored = map(str.strip, line.split('\t', 1))
				return ('!' in pNumStr, int(pNumStr.rstrip('!')), map(eval, stored.split('\t')))
		self.values = map(parseLine, fp.readlines())
Ejemplo n.º 13
0
	def _resync(self):  # This function is _VERY_ time critical!
		tmp = self._psrc_raw.resync_psrc()  # First ask about psrc changes
		(result_redo, result_disable, size_change) = (set(tmp[0]), set(tmp[1]), tmp[2])
		psrc_hash_new = self._psrc_raw.get_psrc_hash()
		psrc_hash_changed = self._psrc_hash_stored != psrc_hash_new
		self._psrc_hash_stored = psrc_hash_new
		if not (result_redo or result_disable or size_change or psrc_hash_changed):
			return ParameterSource.get_empty_resync_result()

		ps_old = ParameterSource.create_instance('GCDumpParameterSource', self._path_params)
		pa_old = ParameterAdapter(None, ps_old)
		pa_new = ParameterAdapter(None, self._psrc_raw)
		return self._resync_adapter(pa_old, pa_new, result_redo, result_disable, size_change)
Ejemplo n.º 14
0
	def _addConstantPSource(self, config, cName, varName):
		lookupVar = config.get('%s lookup' % cName, '', onChange = None)
		if lookupVar:
			matcher = Matcher.createInstance(config.get('%s matcher' % cName, 'start', onChange = None), config, cName)
			content = config.getDict(cName, {}, onChange = None)
			content_fixed = {}
			content_order = lmap(lambda x: (x,), content[1])
			for key in content[0]:
				content_fixed[(key,)] = (content[0][key],)
			ps = ParameterSource.createInstance('SimpleLookupParameterSource', varName, [lookupVar], [matcher], (content_fixed, content_order))
			self.lookupSources.append(ps)
		else:
			ps = ParameterSource.createInstance('ConstParameterSource', varName, config.get(cName).strip())
			self.constSources.append(ps)
Ejemplo n.º 15
0
	def resync(self, force = False): # Do not overwrite resync results - eg. from external or init trigger
		source_hash = self._source.getHash()
		if (self._resync_state == ParameterSource.EmptyResyncResult()) and ((source_hash != self._source_hash) or force):
			activity = Activity('Syncronizing parameter information')
			t_start = time.time()
			try:
				self._resync_state = self._resync()
			except Exception:
				raise ParameterError('Unable to resync parameters!')
			self._source_hash = self._source.getHash()
			activity.finish()
			self._log.log(logging.INFO, 'Finished resync of parameter source (%s)', strTimeShort(time.time() - t_start))
		result = self._resync_state
		self._resync_state = ParameterSource.EmptyResyncResult()
		return result
Ejemplo n.º 16
0
	def __init__(self, dataDir, srcName, dataProvider, dataSplitter, dataProc):
		ParameterSource.__init__(self)
		(self.dataDir, self.srcName, self.dataProvider, self.dataSplitter, self.dataProc) = \
			(dataDir, srcName, dataProvider, dataSplitter, dataProc)

		if not dataProvider:
			pass # debug mode - used by scripts - disables resync
		elif os.path.exists(self.getDataPath('cache.dat') and self.getDataPath('map.tar')):
			self.dataSplitter.importState(self.getDataPath('map.tar'))
		else:
			self.dataProvider.saveState(self.getDataPath('cache.dat'))
			self.dataSplitter.splitDataset(self.getDataPath('map.tar'), self.dataProvider.getBlocks())

		self.maxN = self.dataSplitter.getMaxJobs()
		self.keepOld = True
Ejemplo n.º 17
0
	def __init__(self, dataDir, srcName, dataProvider, dataSplitter, dataProc, keepOld = True):
		ParameterSource.__init__(self)
		(self._dataDir, self._srcName, self._dataProvider, self._dataSplitter, self._part_proc) = \
			(dataDir, srcName, dataProvider, dataSplitter, dataProc)

		if not dataProvider:
			pass # debug mode - used by scripts - disables resync
		elif os.path.exists(self.getDataPath('cache.dat') and self.getDataPath('map.tar')):
			self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
		else:
			DataProvider.saveToFile(self.getDataPath('cache.dat'), self._dataProvider.getBlocks(silent = False))
			self._dataSplitter.splitDataset(self.getDataPath('map.tar'), self._dataProvider.getBlocks())

		self._maxN = self._dataSplitter.getMaxJobs()
		self._keepOld = keepOld
Ejemplo n.º 18
0
	def __init__(self, fn):
		ParameterSource.__init__(self)
		fp = ZipFile(fn, 'r')
		try:
			keyline = fp.readline().lstrip('#').strip()
			self._keys = []
			if keyline:
				self._keys = parseJSON(keyline)
			def parseLine(line):
				if not line.startswith('#'):
					pNumStr, stored = lmap(str.strip, line.split('\t', 1))
					return ('!' in pNumStr, int(pNumStr.rstrip('!')), lmap(parseJSON, stored.split('\t')))
			self._values = lmap(parseLine, fp.readlines())
		finally:
			fp.close()
Ejemplo n.º 19
0
	def __init__(self, config, source):
		self._rawSource = source
		BasicParameterAdapter.__init__(self, config, source)
		self._mapJob2PID = {}
		if not os.path.isdir(config.getWorkPath()):
			os.makedirs(config.getWorkPath())
		self._pathJob2PID = config.getWorkPath('params.map.gz')
		self._pathParams = config.getWorkPath('params.dat.gz')

		# Find out if init should be performed - overrides userResync!
		userInit = config.getState('init', detail = 'parameters')
		needInit = False
		if not (os.path.exists(self._pathParams) and os.path.exists(self._pathJob2PID)):
			needInit = True # Init needed if no parameter log exists
		if userInit and not needInit and (source.getMaxParameters() is not None):
			utils.eprint('Re-Initialization will overwrite the current mapping between jobs and parameter/dataset content! This can lead to invalid results!')
			if utils.getUserBool('Do you want to perform a syncronization between the current mapping and the new one to avoid this?', True):
				userInit = False
		doInit = userInit or needInit

		# Find out if resync should be performed
		userResync = config.getState('resync', detail = 'parameters')
		config.setState(False, 'resync', detail = 'parameters')
		needResync = False
		pHash = self._rawSource.getHash()
		self._storedHash = config.get('parameter hash', pHash, persistent = True)
		if self._storedHash != pHash:
			needResync = True # Resync needed if parameters have changed
			self._log.info('Parameter hash has changed')
			self._log.debug('\told hash: %s', self._storedHash)
			self._log.debug('\tnew hash: %s', pHash)
			config.setState(True, 'init', detail = 'config')
		doResync = (userResync or needResync) and not doInit

		if not doResync and not doInit: # Reuse old mapping
			activity = utils.ActivityLog('Loading cached parameter information')
			self._readJob2PID()
			activity.finish()
			return
		elif doResync: # Perform sync
			activity = utils.ActivityLog('Syncronizing parameter information')
			self._storedHash = None
			self._resyncState = self.resync()
			activity.finish()
		elif doInit: # Write current state
			self._writeJob2PID(self._pathJob2PID)
			ParameterSource.getClass('GCDumpParameterSource').write(self._pathParams, self)
		config.set('parameter hash', self._rawSource.getHash())
Ejemplo n.º 20
0
 def create_psrc(cls, pconfig, repository, ref_name, *args):  # pylint:disable=arguments-differ
     ref_type_default = 'data'
     if 'dataset:' + ref_name not in repository:
         ref_type_default = 'csv'
     ref_type = pconfig.get(ref_name, 'type', ref_type_default)
     return ParameterSource.create_psrc_safe(ref_type, pconfig, repository,
                                             ref_name, *args)
Ejemplo n.º 21
0
	def __new__(cls, *psources):
		psources = strip_null_sources(psources)
		if len(psources) == 1:
			return psources[0]
		elif not psources:
			return NullParameterSource()
		return ParameterSource.__new__(cls)
Ejemplo n.º 22
0
	def _useAvailableDataSource(self, source):
		DataParameterSource = Plugin.getClass('DataParameterSource')
		if DataParameterSource.datasetsAvailable and not DataParameterSource.datasetsUsed:
			if source is not None:
				return ParameterSource.createInstance('CrossParameterSource', DataParameterSource.create(), source)
			return DataParameterSource.create()
		return source
Ejemplo n.º 23
0
 def __new__(cls, *psources):
     psources = strip_null_sources(psources)
     if len(psources) == 1:
         return psources[0]
     elif not psources:
         return NullParameterSource()
     return ParameterSource.__new__(cls)
Ejemplo n.º 24
0
    def _getUserSource(self, pExpr):
        # Wrap psource factory functions
        def createWrapper(clsName):
            def wrapper(*args):
                parameterClass = ParameterSource.getClass(clsName)
                try:
                    return parameterClass.create(self._paramConfig,
                                                 self._repository, *args)
                except Exception:
                    raise ParameterError(
                        'Error while creating %r with arguments %r' %
                        (parameterClass.__name__, args))

            return wrapper

        userFun = {}
        for clsInfo in ParameterSource.getClassList():
            for clsName in ifilter(lambda name: name != 'depth',
                                   clsInfo.keys()):
                userFun[clsName] = createWrapper(clsName)
        try:
            return eval(pExpr, dict(userFun))  # pylint:disable=eval-used
        except Exception:
            self._log.warning('Available functions: %s',
                              sorted(userFun.keys()))
            raise
Ejemplo n.º 25
0
	def resync(self):
		(result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self)
		if self.resyncEnabled() and self._dataProvider:
			# Get old and new dataset information
			old = DataProvider.loadFromFile(self.getDataPath('cache.dat')).getBlocks()
			self._dataProvider.clearCache()
			new = self._dataProvider.getBlocks()
			self._dataProvider.saveToFile(self.getDataPath('cache-new.dat'), new)

			# Use old splitting information to synchronize with new dataset infos
			jobChanges = self._dataSplitter.resyncMapping(self.getDataPath('map-new.tar'), old, new)
			if jobChanges:
				# Move current splitting to backup and use the new splitting from now on
				def backupRename(old, cur, new):
					if self._keepOld:
						os.rename(self.getDataPath(cur), self.getDataPath(old))
					os.rename(self.getDataPath(new), self.getDataPath(cur))
				backupRename(  'map-old-%d.tar' % time.time(),   'map.tar',   'map-new.tar')
				backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat')
				old_maxN = self._dataSplitter.getMaxJobs()
				self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
				self._maxN = self._dataSplitter.getMaxJobs()
				result_redo.update(jobChanges[0])
				result_disable.update(jobChanges[1])
				result_sizeChange = result_sizeChange or (old_maxN != self._maxN)
			self.resyncFinished()
		return (result_redo, result_disable, result_sizeChange)
Ejemplo n.º 26
0
	def resync(self): # Quicker version than the general purpose implementation
		result = ParameterSource.resync(self)
		for psource in self._psourceList:
			result = combineSyncResult(result, psource.resync())
		oldMaxParameters = self._maxParameters
		self._maxParameters = self.initMaxParameters()
		return (result[0], result[1], oldMaxParameters != self._maxParameters)
Ejemplo n.º 27
0
	def resync_psrc(self):  # Quicker version than the general purpose implementation
		result = ParameterSource.get_empty_resync_result()
		for psrc in self._psrc_list:
			result = _combine_resync_result(result, psrc.resync_psrc())
		psrc_max_old = self._psrc_max
		self._psrc_max_list = lmap(lambda p: p.get_parameter_len(), self._psrc_list)
		self._psrc_max = self._init_psrc_max()
		return (result[0], result[1], psrc_max_old != self._psrc_max)
Ejemplo n.º 28
0
 def _useAvailableDataSource(self, source):
     usedSources = source.getUsedSources()
     for (srcName, dataSource) in self._repository.items():
         if srcName.startswith('dataset:') and (dataSource
                                                not in usedSources):
             source = ParameterSource.createInstance(
                 'CrossParameterSource', dataSource, source)
     return source
Ejemplo n.º 29
0
	def _createPSpace(self, args):
		SubSpaceParameterSource = ParameterSource.getClass('SubSpaceParameterSource')
		if len(args) == 1:
			return SubSpaceParameterSource.create(self._paramConfig, args[0])
		elif len(args) == 3:
			return SubSpaceParameterSource.create(self._paramConfig, args[2], args[0])
		else:
			raise APIError('Invalid subspace reference!: %r' % args)
Ejemplo n.º 30
0
			def wrapper(*args):
				try:
					parameterClass = ParameterSource.getClass(clsName)
				except Exception:
					raise ParameterError('Unable to create parameter source "%r"!' % clsName)
				try:
					return parameterClass.create(self._paramConfig, *args)
				except Exception:
					raise ParameterError('Error while creating "%r" with arguments "%r"' % (parameterClass.__name__, args))
Ejemplo n.º 31
0
	def _createVarSource(self, var_list, lookup_list): # create variable source
		psource_list = []
		for (doElevate, PSourceClass, args) in createLookupHelper(self._paramConfig, var_list, lookup_list):
			if doElevate: # switch needs elevation beyond local scope
				self._nestedSources.append((PSourceClass, args))
			else:
				psource_list.append(PSourceClass(*args))
		# Optimize away unnecessary cross operations
		return ParameterSource.createInstance('CrossParameterSource', *psource_list)
Ejemplo n.º 32
0
	def __new__(cls, *psrc_list):
		(repeat, psrc_list) = _separate_repeat(_strip_null_sources(psrc_list))
		if repeat != 1:
			return RepeatParameterSource(cls(*psrc_list), repeat)
		elif not psrc_list:
			return NullParameterSource()
		elif len(psrc_list) == 1:
			return psrc_list[0]
		return ParameterSource.__new__(cls)
Ejemplo n.º 33
0
	def resync(self, force=False):
		source_hash = self._psrc.get_psrc_hash()
		do_resync = (source_hash != self._psrc_hash) or self._psrc.get_resync_request() or force
		# Do not overwrite resync results - eg. from external or init trigger
		if (self._resync_state == ParameterSource.get_empty_resync_result()) and do_resync:
			activity = Activity('Syncronizing parameter information')
			t_start = time.time()
			try:
				self._resync_state = self._resync()
			except Exception:
				raise ParameterError('Unable to resync parameters!')
			self._psrc_hash = self._psrc.get_psrc_hash()
			activity.finish()
			self._log.log(logging.INFO, 'Finished resync of parameter source (%s)',
				str_time_short(time.time() - t_start))
		result = self._resync_state
		self._resync_state = ParameterSource.get_empty_resync_result()
		return result
Ejemplo n.º 34
0
 def __new__(cls, *psrc_list):
     (repeat, psrc_list) = _separate_repeat(_strip_null_sources(psrc_list))
     if repeat != 1:
         return RepeatParameterSource(cls(*psrc_list), repeat)
     elif not psrc_list:
         return NullParameterSource()
     elif len(psrc_list) == 1:
         return psrc_list[0]
     return ParameterSource.__new__(cls)
Ejemplo n.º 35
0
	def __init__(self, fn):
		ParameterSource.__init__(self)
		fp = GZipTextFile(fn, 'r')
		try:
			header = fp.readline().lstrip('#').strip()
			self._output_vn_list = []
			if header:
				self._output_vn_list = parse_json(header)

			def _parse_line(line):
				if not line.startswith('#'):
					pnum_str, stored_json = line.split('\t', 1)
					is_invalid = '!' in pnum_str
					pnum = int(pnum_str.replace('!', ' '))
					return (is_invalid, pnum, lmap(parse_json, stored_json.strip().split('\t')))
			self._values = lmap(_parse_line, fp.readlines())
		finally:
			fp.close()
Ejemplo n.º 36
0
 def create_psrc(cls, pconfig, repository, output_vn, lookup_vn_list=None):  # pylint:disable=arguments-differ
     parameter_value = pconfig.get_parameter(output_vn.lstrip('!'))
     if isinstance(parameter_value, list):
         if len(
                 parameter_value
         ) != 1:  # Simplify single value parameters to const parameters
             return ParameterSource.create_instance('SimpleParameterSource',
                                                    output_vn,
                                                    parameter_value)
         return ParameterSource.create_instance('ConstParameterSource',
                                                output_vn,
                                                parameter_value[0])
     elif isinstance(parameter_value,
                     tuple) and not isinstance(parameter_value[0], dict):
         return ParameterSource.create_instance(*parameter_value)
     return ParameterSource.create_instance(
         'InternalAutoLookupParameterSource', pconfig, output_vn,
         lookup_vn_list)
Ejemplo n.º 37
0
    def __init__(self, fn):
        ParameterSource.__init__(self)
        fp = ZipFile(fn, 'r')
        try:
            keyline = fp.readline().lstrip('#').strip()
            self._keys = []
            if keyline:
                self._keys = parseJSON(keyline)

            def parseLine(line):
                if not line.startswith('#'):
                    pNumStr, stored = lmap(str.strip, line.split('\t', 1))
                    return ('!' in pNumStr, int(pNumStr.rstrip('!')),
                            lmap(parseJSON, stored.split('\t')))

            self._values = lmap(parseLine, fp.readlines())
        finally:
            fp.close()
Ejemplo n.º 38
0
	def _register_psrc(self, pconfig, output_vn):
		def _replace_nonalnum(value):
			if str.isalnum(value):
				return value
			return ' '
		lookup_str = pconfig.get(output_vn, 'lookup', '')
		lookup_vn_list = lidfilter(str.join('', imap(_replace_nonalnum, lookup_str)).split())
		self._psrc_list.append(ParameterSource.create_psrc_safe('InternalAutoParameterSource',
			pconfig, {}, output_vn, lookup_vn_list))
Ejemplo n.º 39
0
 def wrapper(*args):
     parameterClass = ParameterSource.getClass(clsName)
     try:
         return parameterClass.create(self._paramConfig,
                                      self._repository, *args)
     except Exception:
         raise ParameterError(
             'Error while creating %r with arguments %r' %
             (parameterClass.__name__, args))
Ejemplo n.º 40
0
 def resync_psrc(
         self):  # Quicker version than the general purpose implementation
     result = ParameterSource.get_empty_resync_result()
     for psrc in self._psrc_list:
         result = _combine_resync_result(result, psrc.resync_psrc())
     psrc_max_old = self._psrc_max
     self._psrc_max_list = lmap(lambda p: p.get_parameter_len(),
                                self._psrc_list)
     self._psrc_max = self._init_psrc_max()
     return (result[0], result[1], psrc_max_old != self._psrc_max)
Ejemplo n.º 41
0
 def __new__(cls, psrc, times):  # pylint:disable=arguments-differ
     if times < 0:
         return psrc
     elif psrc.get_parameter_len() is None:
         return TruncateParameterSource(psrc, times)
     elif times == 0:
         return NullParameterSource()
     elif times == 1:
         return psrc
     return ParameterSource.__new__(cls)
Ejemplo n.º 42
0
	def __new__(cls, psrc, times):  # pylint:disable=arguments-differ
		if times < 0:
			return psrc
		elif psrc.get_parameter_len() is None:
			return TruncateParameterSource(psrc, times)
		elif times == 0:
			return NullParameterSource()
		elif times == 1:
			return psrc
		return ParameterSource.__new__(cls)
Ejemplo n.º 43
0
	def resync_psrc(self):
		(result_redo, result_disable, _) = ParameterSource.get_empty_resync_result()
		(psrc_redo, psrc_disable, psrc_size_change) = self._psrc.resync_psrc()
		self._psp_field = self._init_psp_field()
		for pnum, psp_info in enumerate(self._psp_field):
			psrc_pnum, _ = psp_info  # ignore output_idx
			if psrc_pnum in psrc_redo:
				result_redo.add(pnum)
			if psrc_pnum in psrc_disable:
				result_disable.add(pnum)
		return (result_redo, result_disable, psrc_size_change)
Ejemplo n.º 44
0
	def resync(self):
		(result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self)
		(psource_redo, psource_disable, _) = self._psource.resync() # size change is irrelevant if outside of range
		for pNum in psource_redo:
			if (pNum >= self._posStart) and (pNum <= self._posEnd):
				result_redo.add(pNum - self._posStart)
		for pNum in psource_disable:
			if (pNum >= self._posStart) and (pNum <= self._posEnd):
				result_disable.add(pNum - self._posStart)
		oldPosEnd = self._posEnd
		self._posEnd = utils.QM(self._posEndUser is None, self._psource.getMaxParameters() - 1, self._posEndUser)
		return (result_redo, result_disable, result_sizeChange or (oldPosEnd != self._posEnd))
Ejemplo n.º 45
0
	def resync(self):
		(result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self)
		if self.resyncEnabled():
			(psource_redo, psource_disable, psource_sizeChange) = self._psource.resync()
			self._pSpace = self.initPSpace()
			for pNum, pInfo in enumerate(self._pSpace):
				subNum, _ = pInfo # ignore lookupIndex
				if subNum in psource_redo:
					result_redo.add(pNum)
				if subNum in psource_disable:
					result_disable.add(pNum)
			self.resyncFinished()
		return (result_redo, result_disable, result_sizeChange or psource_sizeChange)