def getOutputDatasetsWithPSet(payloadNode, sorted=False): """ _getOutputDatasetsWithPSet_ Extract all the information about output datasets from the payloadNode object provided, including the {{}} format PSet cfg Returns a list of DatasetInfo objects including App details from the node. """ result = [] for item in payloadNode._OutputDatasets: resultEntry = DatasetInfo() resultEntry.update(item) resultEntry["ApplicationName"] = payloadNode.application['Executable'] resultEntry["ApplicationProject"] = payloadNode.application['Project'] resultEntry["ApplicationVersion"] = payloadNode.application['Version'] resultEntry["ApplicationFamily"] = item.get("OutputModuleName", "AppFamily") try: config = payloadNode.cfgInterface psetStr = config.originalContent() resultEntry['PSetContent'] = psetStr resultEntry['Conditions'] = config.conditionsTag except Exception, ex: resultEntry['PSetContent'] = None result.append(resultEntry)
def expandDatasetInfo(datasetInfo, requestTimestamp): """ _expandDatasetInfo_ Given a DatasetInfo, check to see if it contains multi tiers, and if it does, expand them to a list of basic tier dataset info objects returns a list of datasetInfo objects """ result = [] if not isMultiTier(datasetInfo['DataTier']): # // # // No multi tier, return same object as only entry in list #// result.append(datasetInfo) return result tiers = splitMultiTier(datasetInfo['DataTier']) processedDSName = "%s-%s-%s" % (datasetInfo['ApplicationVersion'], datasetInfo['OutputModuleName'], requestTimestamp) for tier in tiers: newInfo = DatasetInfo() newInfo.update(datasetInfo) newInfo['DataTier'] = tier newInfo['ProcessedDataset'] = processedDSName result.append(newInfo) return result
def __call__(self, node): """ _operator(node)_ Operate on all output datasets in a Payload Node """ for dataset in node._OutputDatasets: if dataset.has_key("NoMerge"): # // # // If we need to avoid merging some datasets we #// can add a NoMerge key and this will ignore it continue newDataset = DatasetInfo() newDataset.update(dataset) newDataset["ApplicationFamily"] = self.mergeModuleName newDataset["ApplicationName"] = self.appName newDataset["ApplicationVersion"] = node.application['Version'] procName = dataset["ProcessedDataset"] if procName.endswith("-unmerged"): procName = procName.replace("-unmerged", "") else: procName = "%s-merged" % procName newDataset["ProcessedDataset"] = procName newDataset["ParentDataset"] = dataset.name() newDataset['OutputModuleName'] = "%s-Merged" % ( newDataset['OutputModuleName'], ) if node.userSandbox != None: newDataset['UserSandbox'] = node.userSandbox self.result.append(newDataset) return
def newDataset(self): """ _newDataset_ Add a new dataset that this file is associated with and return the dictionary to be populated """ newDS = DatasetInfo() self.dataset.append(newDS) return newDS
def addPileupDataset(self, primary, tier, processed): """ _addPileupDataset_ Add a pileup dataset to this node """ newDataset = DatasetInfo() newDataset['PrimaryDataset'] = primary newDataset['DataTier'] = tier newDataset['ProcessedDataset'] = processed self._PileupDatasets.append(newDataset) return newDataset
def getPileupDatasets(payloadNode): """ _getPileupDatasets_ Extract all pileup dataset info from the node provided. Returns a list of dataset info objects """ result = [] for item in payloadNode._PileupDatasets: resultEntry = DatasetInfo() resultEntry.update(item) resultEntry['NodeName'] = payloadNode.name result.append(resultEntry) return result
def addOutputDataset(self, primaryDS, processedDS, outputModuleName): """ _addOutputDataset_ Add a new Output Dataset, specifying the Primary and Processed Dataset names and the name of the output module in the PSet responsible for writing out files for that dataset """ newDataset = DatasetInfo() newDataset['PrimaryDataset'] = primaryDS newDataset['ProcessedDataset'] = processedDS newDataset['OutputModuleName'] = outputModuleName self._OutputDatasets.append(newDataset) return newDataset
def getInputDatasets(payloadNode, sorted=False): """ _getInputDatasets_ Extract all the information about input datasets from the payloadNode object provided. Returns a list of DatasetInfo objects including App details from the node. """ result = [] for item in payloadNode._InputDatasets: resultEntry = DatasetInfo() resultEntry.update(item) result.append(resultEntry) if sorted: result = _sortDatasets(result) return result
def addInputDataset(self, primaryDS, processedDS): """ _addInputDataset_ Add a new Input Dataset to this Node. Arguments should be: - *primaryDS* : The Primary Dataset name of the input dataset - *processedDS* : The Processed Dataset name of the input dataset The DatasetInfo object is returned by reference for more information to be added to it InputModuleName should be the mainInputSource of the PSet for the main input dataset. At present this is set elsewhere """ newDataset = DatasetInfo() newDataset['PrimaryDataset'] = primaryDS newDataset['ProcessedDataset'] = processedDS self._InputDatasets.append(newDataset) return newDataset
def getOutputDatasets(payloadNode, sorted=False): """ _getOutputDatasets_ Extract all the information about output datasets from the payloadNode object provided. Returns a list of DatasetInfo objects including App details from the node. """ result = [] for item in payloadNode._OutputDatasets: resultEntry = DatasetInfo() resultEntry.update(item) resultEntry["ApplicationName"] = payloadNode.application['Executable'] resultEntry["ApplicationProject"] = payloadNode.application['Project'] resultEntry["ApplicationVersion"] = payloadNode.application['Version'] result.append(resultEntry) if sorted: result = _sortDatasets(result) return result
def unpackPayloadNodeData(self, improvNode): """ _unpackPayloadNodeData_ Unpack PayloadNode data from improv Node provided and add information to self """ self.name = str(improvNode.attrs["Name"]) self.type = str(improvNode.attrs["Type"]) workflowName = improvNode.attrs.get('Workflow', None) if workflowName != None: self.workflow = str(workflowName) # // # // Unpack data for this instance #// App details appDataQ = IMProvQuery("/%s/Application" % self.__class__.__name__) appData = appDataQ(improvNode)[0] for appField in appData.children: field = str(appField.name) value = str(appField.attrs['Value']) self.application[field] = value # // # // App Control details #// appConDataQ = IMProvQuery("/%s/ApplicationControl/*" % self.__class__.__name__) appConData = appConDataQ(improvNode) for appConField in appConData: field = str(appConField.name) value = str(appConField.attrs['Value']) self.applicationControls[field] = value # // # // Script Controls #// scriptConQ = IMProvQuery("/%s/ScriptControls/ScriptList" % self.__class__.__name__) scriptLists = scriptConQ(improvNode) for scriptList in scriptLists: listName = scriptList.attrs.get("Name", None) if listName == None: continue listName = str(listName) for script in scriptList.children: scriptName = script.attrs.get("Value", None) if scriptName == None: continue self.scriptControls[listName].append(str(scriptName)) # // # // Dataset details #// Input Datasets inputDSQ = IMProvQuery("/%s/InputDatasets/DatasetInfo" % self.__class__.__name__) inputDS = inputDSQ(improvNode) # print improvNode for item in inputDS: newDS = DatasetInfo() newDS.load(item) self._InputDatasets.append(newDS) # // # // Output Datasets #// outputDSQ = IMProvQuery("/%s/OutputDatasets/DatasetInfo" % self.__class__.__name__) outputDS = outputDSQ(improvNode) for item in outputDS: newDS = DatasetInfo() newDS.load(item) self._OutputDatasets.append(newDS) # // # // Pileup Datasets #// pileupDSQ = IMProvQuery("/%s/PileupDatasets/DatasetInfo" % self.__class__.__name__) pileupDS = pileupDSQ(improvNode) for item in pileupDS: newDS = DatasetInfo() newDS.load(item) self._PileupDatasets.append(newDS) # // # // Input Links #// inpLinkQ = IMProvQuery("/%s/InputLinks/InputLink" % self.__class__.__name__) inpLinks = inpLinkQ(improvNode) for ilink in inpLinks: newLink = InputLink() newLink.load(ilink) self._InputLinks.append(newLink) # // # // Configuration #// configQ = IMProvQuery("/%s/Configuration" % self.__class__.__name__) configNodes = configQ(improvNode) if len(configNodes) > 0: configNode = configNodes[0] self.configuration = base64.decodestring(str(configNode.chardata)) cfgIntQ = IMProvQuery("/%s/CMSSWConfig" % self.__class__.__name__) cfgNodes = cfgIntQ(improvNode) if len(cfgNodes) > 0: cfgNode = cfgNodes[0] self.cfgInterface = CMSSWConfig() self.cfgInterface.load(cfgNode) # // # // User sandbox #// sandboxQ = IMProvQuery("/%s/UserSandbox" % self.__class__.__name__) sandboxNodes = sandboxQ(improvNode) if len(sandboxNodes) > 0: sandboxNode = sandboxNodes[-1] self.userSandbox = str(sandboxNode.chardata) return