예제 #1
0
    def get_pick_dataset(self, verbose=False):
        
        job = self._getParent()
        if job and job.inputdata and job.inputdata.pick_event_list.name != '' and  len(job.inputdata.dataset) !=0 :
            raise ApplicationConfigurationError('Cannot use event pick list and input dataset at the same time.')

        #parametr check for event picking
        if job and job.inputdata and job.inputdata.pick_event_list.name != '':
            if job.inputdata.pick_data_type == '':
                raise ApplicationConfigurationError('Event pick data type (pick_data_type) must be specified.')

        # set X509_USER_PROXY
        from pandatools import Client
        if 'X509_USER_PROXY' not in os.environ or os.environ['X509_USER_PROXY'] == '':
            os.environ['X509_USER_PROXY'] = Client._x509()

        # setup eventLookup
        from pandatools.eventLookupClient import eventLookupClient
        elssiIF = eventLookupClient()
        # open run/event txt
        runEvtList = []
        if os.path.exists( self.pick_event_list.name ):
            logger.info("Event pick list file %s selected" % self.pick_event_list.name)
            runevttxt = open(self.pick_event_list.name)
            for line in runevttxt:
                items = line.split()
                if len(items) != 2:
                    continue
                runNr,evtNr = items
                runEvtList.append([runNr,evtNr])
                # close
            runevttxt.close()

        else:
            raise ApplicationConfigurationError('Could not read event pick list file %s.' %self.pick_event_list.name)

        # convert self.pick_data_type to Athena stream ref
        if self.pick_data_type == 'AOD':
            streamRef = 'StreamAOD_ref'
        elif self.pick_data_type == 'ESD':
            streamRef = 'StreamESD_ref'
        elif self.pick_data_type == 'RAW':
            streamRef = 'StreamRAW_ref'
        else:
            errStr  = 'invalid data type %s for event picking. ' % self.pick_data_type
            errStr += ' Must be one of AOD,ESD,RAW'
            raise ApplicationConfigurationError(errStr)
        logger.info('Getting dataset names and LFNs from ELSSI for event picking')

        # read
        guids = []
        guidRunEvtMap = {}
        runEvtGuidMap = {}
        # bulk lookup
        nEventsPerLoop = 500
        iEventsTotal = 0
        while iEventsTotal < len(runEvtList):
            tmpRunEvtList = runEvtList[iEventsTotal:iEventsTotal+nEventsPerLoop]
            iEventsTotal += nEventsPerLoop
            paramStr = 'Run, Evt: %s, Stream: %s, Dataset pattern: %s' % (tmpRunEvtList,self.pick_stream_name, self.pick_dataset_pattern)
            logger.debug(paramStr)
            logger.info('.')
            # check with ELSSI
            if self.pick_stream_name == '':
                guidListELSSI = elssiIF.doLookup(tmpRunEvtList,tokens=streamRef,amitag=self.event_pick_amitag,extract=True)
            else:
                guidListELSSI = elssiIF.doLookup(tmpRunEvtList,stream=self.pick_stream_name,tokens=streamRef,amitag=self.event_pick_amitag,extract=True)

            if len(guidListELSSI) == 0 or guidListELSSI == None:
                errStr = ''
                for tmpLine in elssiIF.output:
                    errStr += tmpLine + '\n'
                errStr = "GUID was not found in ELSSI.\n" + errStr
                raise ApplicationConfigurationError(errStr)

            # check attribute
            attrNames, attrVals = guidListELSSI
            def getAttributeIndex(attr):
                for tmpIdx,tmpAttrName in enumerate(attrNames):
                    if tmpAttrName.strip() == attr:
                        return tmpIdx
                errStr = "cannot find attribute=%s in %s provided by ELSSI" % (attr,str(attrNames))
                raise ApplicationConfigurationError(errStr)
            # get index
            indexEvt = getAttributeIndex('EventNumber')
            indexRun = getAttributeIndex('RunNumber')
            indexTag = getAttributeIndex(streamRef)
            # check events
            for runNr,evtNr in tmpRunEvtList:
                paramStr = 'Run:%s Evt:%s Stream:%s' % (runNr,evtNr,self.pick_stream_name)
                # collect GUIDs
                tmpguids = []
                for attrVal in attrVals:
                    if runNr == attrVal[indexRun] and evtNr == attrVal[indexEvt]:
                        tmpGuid = attrVal[indexTag]
                        # check non existing
                        if tmpGuid == 'NOATTRIB':
                            continue
                        if not tmpGuid in tmpguids:
                            tmpguids.append(tmpGuid)
                # not found
                if tmpguids == []:
                    errStr = "no GUIDs were found in ELSSI for %s" % paramStr
                    raise ApplicationConfigurationError(errStr)
                # append
                for tmpguid in tmpguids:
                    if not tmpguid in guids:
                        guids.append(tmpguid)
                        guidRunEvtMap[tmpguid] = []
                    guidRunEvtMap[tmpguid].append((runNr,evtNr))
                runEvtGuidMap[(runNr,evtNr)] = tmpguids

        # convert to dataset names and LFNs
        dsLFNs,allDSMap = Client.listDatasetsByGUIDs(guids,self.pick_dataset_pattern,verbose)
        logger.debug(dsLFNs)
        
        #populate DQ2Datase    
        if job and job.inputdata:
            job.inputdata.dataset = []
            job.inputdata.names = []
            job.inputdata.guids = []

            # check duplication
            for runNr,evtNr in runEvtGuidMap.keys():
                tmpLFNs = []
                tmpAllDSs = {}
                for tmpguid in runEvtGuidMap[(runNr,evtNr)]:
                    if tmpguid in dsLFNs:
                        tmpLFNs.append(dsLFNs[tmpguid])
                        job.inputdata.guids.append(tmpguid)
                        job.inputdata.names.append(dsLFNs[tmpguid][1])
                        if not ((dsLFNs[tmpguid][0]) in job.inputdata.dataset):
                            job.inputdata.dataset.append(dsLFNs[tmpguid][0])
                    else:
                        tmpAllDSs[tmpguid] = allDSMap[tmpguid]
                        if tmpguid in guidRunEvtMap:
                            del guidRunEvtMap[tmpguid]
                # empty        
                if tmpLFNs == []:
                    paramStr = 'Run:%s Evt:%s Stream:%s' % (runNr,evtNr,self.pick_stream_name)                        
                    errStr = "Dataset pattern '%s' didn't pick up a file for %s\n" % (self.pick_dataset_pattern,paramStr)
                    for tmpguid,tmpAllDS in tmpAllDSs.iteritems():
                        errStr += "    GUID:%s dataset:%s\n" % (tmpguid,str(tmpAllDS))
                    raise ApplicationConfigurationError(errStr)
                # duplicated    
                if len(tmpLFNs) != 1:
                    paramStr = 'Run:%s Evt:%s Stream:%s' % (runNr,evtNr,self.pick_stream_name)            
                    errStr = "Multiple LFNs %s were found in ELSSI for %s. Please set pick_dataset_pattern and/or pick_stream_name and or event_pick_amitag correctly." %(str(tmpLFNs),paramStr)
                    raise ApplicationConfigurationError(errStr)

        
        # return
        return guidRunEvtMap
예제 #2
0
    def get_pick_dataset(self, verbose=False):

        job = self._getParent()
        if job and job.inputdata and job.inputdata.pick_event_list.name != '' and len(
                job.inputdata.dataset) != 0:
            raise ApplicationConfigurationError(
                None,
                'Cannot use event pick list and input dataset at the same time.'
            )

        #parametr check for event picking
        if job and job.inputdata and job.inputdata.pick_event_list.name != '':
            if job.inputdata.pick_data_type == '':
                raise ApplicationConfigurationError(
                    None,
                    'Event pick data type (pick_data_type) must be specified.')

        # set X509_USER_PROXY
        from pandatools import Client
        if 'X509_USER_PROXY' not in os.environ or os.environ[
                'X509_USER_PROXY'] == '':
            os.environ['X509_USER_PROXY'] = Client._x509()

        # setup eventLookup
        from pandatools.eventLookupClient import eventLookupClient
        elssiIF = eventLookupClient()
        # open run/event txt
        runEvtList = []
        if os.path.exists(self.pick_event_list.name):
            logger.info("Event pick list file %s selected" %
                        self.pick_event_list.name)
            runevttxt = open(self.pick_event_list.name)
            for line in runevttxt:
                items = line.split()
                if len(items) != 2:
                    continue
                runNr, evtNr = items
                runEvtList.append([runNr, evtNr])
                # close
            runevttxt.close()

        else:
            raise ApplicationConfigurationError(
                None, 'Could not read event pick list file %s.' %
                self.pick_event_list.name)

        # convert self.pick_data_type to Athena stream ref
        if self.pick_data_type == 'AOD':
            streamRef = 'StreamAOD_ref'
        elif self.pick_data_type == 'ESD':
            streamRef = 'StreamESD_ref'
        elif self.pick_data_type == 'RAW':
            streamRef = 'StreamRAW_ref'
        else:
            errStr = 'invalid data type %s for event picking. ' % self.pick_data_type
            errStr += ' Must be one of AOD,ESD,RAW'
            raise ApplicationConfigurationError(None, errStr)
        logger.info(
            'Getting dataset names and LFNs from ELSSI for event picking')

        # read
        guids = []
        guidRunEvtMap = {}
        runEvtGuidMap = {}
        # bulk lookup
        nEventsPerLoop = 500
        iEventsTotal = 0
        while iEventsTotal < len(runEvtList):
            tmpRunEvtList = runEvtList[iEventsTotal:iEventsTotal +
                                       nEventsPerLoop]
            iEventsTotal += nEventsPerLoop
            paramStr = 'Run, Evt: %s, Stream: %s, Dataset pattern: %s' % (
                tmpRunEvtList, self.pick_stream_name,
                self.pick_dataset_pattern)
            logger.debug(paramStr)
            logger.info('.')
            # check with ELSSI
            if self.pick_stream_name == '':
                guidListELSSI = elssiIF.doLookup(tmpRunEvtList,
                                                 tokens=streamRef,
                                                 amitag=self.event_pick_amitag,
                                                 extract=True)
            else:
                guidListELSSI = elssiIF.doLookup(tmpRunEvtList,
                                                 stream=self.pick_stream_name,
                                                 tokens=streamRef,
                                                 amitag=self.event_pick_amitag,
                                                 extract=True)

            if len(guidListELSSI) == 0 or guidListELSSI == None:
                errStr = ''
                for tmpLine in elssiIF.output:
                    errStr += tmpLine + '\n'
                errStr = "GUID was not found in ELSSI.\n" + errStr
                raise ApplicationConfigurationError(None, errStr)

            # check attribute
            attrNames, attrVals = guidListELSSI

            def getAttributeIndex(attr):
                for tmpIdx, tmpAttrName in enumerate(attrNames):
                    if tmpAttrName.strip() == attr:
                        return tmpIdx
                errStr = "cannot find attribute=%s in %s provided by ELSSI" % (
                    attr, str(attrNames))
                raise ApplicationConfigurationError(None, errStr)

            # get index
            indexEvt = getAttributeIndex('EventNumber')
            indexRun = getAttributeIndex('RunNumber')
            indexTag = getAttributeIndex(streamRef)
            # check events
            for runNr, evtNr in tmpRunEvtList:
                paramStr = 'Run:%s Evt:%s Stream:%s' % (runNr, evtNr,
                                                        self.pick_stream_name)
                # collect GUIDs
                tmpguids = []
                for attrVal in attrVals:
                    if runNr == attrVal[indexRun] and evtNr == attrVal[
                            indexEvt]:
                        tmpGuid = attrVal[indexTag]
                        # check non existing
                        if tmpGuid == 'NOATTRIB':
                            continue
                        if not tmpGuid in tmpguids:
                            tmpguids.append(tmpGuid)
                # not found
                if tmpguids == []:
                    errStr = "no GUIDs were found in ELSSI for %s" % paramStr
                    raise ApplicationConfigurationError(None, errStr)
                # append
                for tmpguid in tmpguids:
                    if not tmpguid in guids:
                        guids.append(tmpguid)
                        guidRunEvtMap[tmpguid] = []
                    guidRunEvtMap[tmpguid].append((runNr, evtNr))
                runEvtGuidMap[(runNr, evtNr)] = tmpguids

        # convert to dataset names and LFNs
        dsLFNs, allDSMap = Client.listDatasetsByGUIDs(
            guids, self.pick_dataset_pattern, verbose)
        logger.debug(dsLFNs)

        #populate DQ2Datase
        if job and job.inputdata:
            job.inputdata.dataset = []
            job.inputdata.names = []
            job.inputdata.guids = []

            # check duplication
            for runNr, evtNr in runEvtGuidMap.keys():
                tmpLFNs = []
                tmpAllDSs = {}
                for tmpguid in runEvtGuidMap[(runNr, evtNr)]:
                    if tmpguid in dsLFNs:
                        tmpLFNs.append(dsLFNs[tmpguid])
                        job.inputdata.guids.append(tmpguid)
                        job.inputdata.names.append(dsLFNs[tmpguid][1])
                        if not ((dsLFNs[tmpguid][0]) in job.inputdata.dataset):
                            job.inputdata.dataset.append(dsLFNs[tmpguid][0])
                    else:
                        tmpAllDSs[tmpguid] = allDSMap[tmpguid]
                        if tmpguid in guidRunEvtMap:
                            del guidRunEvtMap[tmpguid]
                # empty
                if tmpLFNs == []:
                    paramStr = 'Run:%s Evt:%s Stream:%s' % (
                        runNr, evtNr, self.pick_stream_name)
                    errStr = "Dataset pattern '%s' didn't pick up a file for %s\n" % (
                        self.pick_dataset_pattern, paramStr)
                    for tmpguid, tmpAllDS in tmpAllDSs.iteritems():
                        errStr += "    GUID:%s dataset:%s\n" % (tmpguid,
                                                                str(tmpAllDS))
                    raise ApplicationConfigurationError(None, errStr)
                # duplicated
                if len(tmpLFNs) != 1:
                    paramStr = 'Run:%s Evt:%s Stream:%s' % (
                        runNr, evtNr, self.pick_stream_name)
                    errStr = "Multiple LFNs %s were found in ELSSI for %s. Please set pick_dataset_pattern and/or pick_stream_name and or event_pick_amitag correctly." % (
                        str(tmpLFNs), paramStr)
                    raise ApplicationConfigurationError(None, errStr)

        # return
        return guidRunEvtMap