Esempio n. 1
0
	def setupJobParameters(self, config, pm):
		config = config.addSections(['dataset']).addTags([self])
		self.dataSplitter = None
		self.dataRefresh = None
		self.dataset = config.get('dataset', '').strip()
		if self.dataset == '':
			return
		config.set('se output pattern', '@NICK@_job_@MY_JOBID@_@X@', override = False)
		config.set('default lookup', 'DATASETNICK', override = False)

		defaultProvider = config.get('dataset provider', 'ListProvider')
		dataProvider = DataProvider.create(config, self.dataset, defaultProvider)
		splitterName = config.get('dataset splitter', 'FileBoundarySplitter')
		splitterClass = dataProvider.checkSplitter(DataSplitter.getClass(splitterName))
		self.dataSplitter = splitterClass(config)
		self.checkSE = config.getBool('dataset storage check', True, onChange = None)

		# Create and register dataset parameter plugin
		paramSource = DataParameterSource(config.getWorkPath(), 'data',
			dataProvider, self.dataSplitter, self.initDataProcessor())
		DataParameterSource.datasetsAvailable['data'] = paramSource

		# Select dataset refresh rate
		self.dataRefresh = config.getTime('dataset refresh', -1, onChange = None)
		if self.dataRefresh > 0:
			paramSource.resyncSetup(interval = max(self.dataRefresh, dataProvider.queryLimit()))
			utils.vprint('Dataset source will be queried every %s' % utils.strTime(self.dataRefresh), -1)
		else:
			paramSource.resyncSetup(interval = 0)
		def externalRefresh(sig, frame):
			paramSource.resyncSetup(force = True)
		signal.signal(signal.SIGUSR2, externalRefresh)

		if self.dataSplitter.getMaxJobs() == 0:
			raise UserError('There are no events to process')
	def tree2expr(self, node):
		if isinstance(node, tuple):
			(operator, args) = node
			if operator == 'lookup':
				assert(len(args) == 2)
				return self._createVarSource(tree2names(args[0]), tree2names(args[1]))
			elif operator == 'ref':
				assert(len(args) == 1)
				refTypeDefault = 'dataset'
				if args[0] not in DataParameterSource.datasetsAvailable:
					refTypeDefault = 'csv'
				refType = self._paramConfig.get(args[0], 'type', refTypeDefault)
				if refType == 'dataset':
					return [DataParameterSource.create(self._paramConfig, args[0])]
				elif refType == 'csv':
					return [CSVParameterSource.create(self._paramConfig, args[0])]
				raise APIError('Unknown reference type: "%s"' % refType)
			else:
				args_complete = lchain(imap(self.tree2expr, args))
				if operator == '*':
					return self.combineSources(CrossParameterSource, args_complete)
				elif operator == '+':
					return self.combineSources(ChainParameterSource, args_complete)
				elif operator == ',':
					return self.combineSources(ZipLongParameterSource, args_complete)
				raise APIError('Unknown token: "%s"' % operator)
		elif isinstance(node, int):
			return [node]
		else:
			return self._createVarSource([node], None)
	def tree2expr(self, node):
		def tree2names(node): # return list of referenced variable names in tree
			if isinstance(node, tuple):
				result = []
				for op_args in node[1:]:
					for arg in op_args:
						result.extend(tree2names(arg))
				return result
			else:
				return [node]

		def createVarSource(var_list, lookup_list): # create variable source
			psource_list = []
			for (doElevate, PSourceClass, args) in createLookupHelper(self.paramConfig, var_list, lookup_list):
				if doElevate: # switch needs elevation beyond local scope
					self.elevatedSwitch.append((PSourceClass, args))
				else:
					psource_list.append(PSourceClass(*args))
			# Optimize away unnecessary cross operations
			if len(filter(lambda p: p.getMaxParameters() != None, psource_list)) > 1:
				return [CrossParameterSource(*psource_list)]
			return psource_list # simply forward list of psources

		if isinstance(node, tuple):
			(operator, args) = node
			if operator == 'lookup':
				assert(len(args) == 2)
				return createVarSource(tree2names(args[0]), tree2names(args[1]))
			elif operator == 'ref':
				assert(len(args) == 1)
				refTypeDefault = 'dataset'
				if args[0] not in DataParameterSource.datasetsAvailable:
					refTypeDefault = 'csv'
				refType = self.paramConfig.get(args[0], 'type', refTypeDefault)
				if refType == 'dataset':
					return [DataParameterSource.create(self.paramConfig, args[0])]
				elif refType == 'csv':
					return [CSVParameterSource.create(self.paramConfig, args[0])]
				raise APIError('Unknown reference type: "%s"' % refType)
			else:
				args_complete = []
				for expr_list in map(lambda expr: self.tree2expr(expr), args):
					args_complete.extend(expr_list)
				if operator == '*':
					return self.combineSources(CrossParameterSource, args_complete)
				elif operator == '+':
					return self.combineSources(ChainParameterSource, args_complete)
				elif operator == ',':
					return self.combineSources(ZipLongParameterSource, args_complete)
				raise APIError('Unknown token: "%s"' % operator)
		elif isinstance(node, int):
			return [node]
		else:
			return createVarSource([node], None)
	def _getUserSource(self, pExpr, parent):
		tokens = tokenize(pExpr, lchain([self.precedence.keys(), list('()[]<>')]))
		tokens = list(tok2inlinetok(tokens, list(self.precedence.keys())))
		utils.vprint('Parsing parameter string: "%s"' % str.join(' ', imap(str, tokens)), 0)
		tree = tok2tree(tokens, self.precedence)

		source_list = self.tree2expr(tree)
		if DataParameterSource.datasetsAvailable and not DataParameterSource.datasetsUsed:
			source_list.insert(0, DataParameterSource.create())
		if parent:
			source_list.append(parent)
		if len(lfilter(lambda p: p.getMaxParameters() is not None, source_list)) > 1:
			source = self.combineSources(CrossParameterSource, source_list)
		else:
			source = self.combineSources(ZipLongParameterSource, source_list) # zip more efficient
		assert(len(source) == 1)
		source = source[0]
		for (PSourceClass, args) in self.elevatedSwitch:
			source = PSourceClass(source, *args)
		utils.vprint('Parsing output: %r' % source, 0)
		return source
	def getSource(self, config):
		source = self._getRawSource(RNGParameterSource())
		if DataParameterSource.datasetsAvailable and not DataParameterSource.datasetsUsed:
			source = CrossParameterSource(DataParameterSource.create(), source)
		return ParameterAdapter.getInstance(self.adapter, config, source)