Ejemplo n.º 1
0
 def doPostProcess(self,taskSpec,tmpLog):
     # pre-check
     try:
         tmpStat = self.doPreCheck(taskSpec,tmpLog)
         if tmpStat:
             return self.SC_SUCCEEDED
     except Exception:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doPreCheck failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     # get DDM I/F
     ddmIF = self.ddmIF.getInterface(taskSpec.vo)
     # loop over all datasets
     for datasetSpec in taskSpec.datasetSpecList:
         # skip pseudo output datasets
         if datasetSpec.type in ['output'] and datasetSpec.isPseudo():
             continue
         try:
             # remove wrong files
             if datasetSpec.type in ['output']:
                 # get successful files
                 okFiles = self.taskBufferIF.getSuccessfulFiles_JEDI(datasetSpec.jediTaskID,datasetSpec.datasetID)
                 if okFiles is None:
                     tmpLog.warning('failed to get successful files for {0}'.format(datasetSpec.datasetName))
                     return self.SC_FAILED
                 # get files in dataset
                 ddmFiles = ddmIF.getFilesInDataset(datasetSpec.datasetName,skipDuplicate=False,ignoreUnknown=True)
                 tmpLog.debug('datasetID={0}:Name={1} has {2} files in DB, {3} files in DDM'.format(datasetSpec.datasetID,
                                                                                                   datasetSpec.datasetName,
                                                                                                   len(okFiles),len(ddmFiles)))
                 # check all files
                 toDelete = []
                 for tmpGUID,attMap in iteritems(ddmFiles):
                     if attMap['lfn'] not in okFiles:
                         did = {'scope':attMap['scope'], 'name':attMap['lfn']}
                         toDelete.append(did)
                         tmpLog.debug('delete {0} from {1}'.format(attMap['lfn'],datasetSpec.datasetName))
                 # delete
                 if toDelete != []:
                     ddmIF.deleteFilesFromDataset(datasetSpec.datasetName,toDelete)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to remove wrong files with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # freeze output and log datasets
             if datasetSpec.type in ['output','log','trn_log']:
                 tmpLog.info('freezing datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 ddmIF.freezeDataset(datasetSpec.datasetName,ignoreUnknown=True)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to freeze datasets with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # delete transient datasets
             if datasetSpec.type in ['trn_output']:
                 tmpLog.debug('deleting datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 retStr = ddmIF.deleteDataset(datasetSpec.datasetName,False,ignoreUnknown=True)
                 tmpLog.info(retStr)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete datasets with {0}:{1}'.format(errtype.__name__,errvalue))
     # check duplication
     if self.getFinalTaskStatus(taskSpec) in ['finished','done'] and taskSpec.gshare != 'Test':
         nDup = self.taskBufferIF.checkDuplication_JEDI(taskSpec.jediTaskID)
         tmpLog.debug('checked duplication with {0}'.format(nDup))
         if nDup > 0:
             errStr = 'paused since {0} duplication found'.format(nDup)
             taskSpec.oldStatus = self.getFinalTaskStatus(taskSpec)
             taskSpec.status = 'paused'
             taskSpec.setErrDiag(errStr)
             tmpLog.debug(errStr)
     # delete ES datasets
     if taskSpec.registerEsFiles():
         try:
             targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
             tmpLog.debug('deleting ES dataset name={0}'.format(targetName))
             retStr = ddmIF.deleteDataset(targetName,False,ignoreUnknown=True)
             tmpLog.debug(retStr)
         except Exception:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete ES dataset with {0}:{1}'.format(errtype.__name__,errvalue))
     try:
         AtlasPostProcessorUtils.send_notification(self.taskBufferIF, ddmIF, taskSpec, tmpLog)
     except Exception as e:
         tmpLog.error('failed to talk to external system with {0}'.format(str(e)))
         return self.SC_FAILED
     try:
         self.doBasicPostProcess(taskSpec,tmpLog)
     except Exception:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doBasicPostProcess failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     return self.SC_SUCCEEDED
Ejemplo n.º 2
0
 def doSetup(self,taskSpec,datasetToRegister,pandaJobs):
     # make logger
     tmpLog = MsgWrapper(logger,"<jediTaskID={0}>".format(taskSpec.jediTaskID))
     tmpLog.info('start label={0} taskType={1}'.format(taskSpec.prodSourceLabel,taskSpec.taskType))
     # returns
     retFatal    = self.SC_FATAL
     retTmpError = self.SC_FAILED
     retOK       = self.SC_SUCCEEDED
     try:
         # get DDM I/F
         ddmIF = self.ddmIF.getInterface(taskSpec.vo)
         # register datasets
         if datasetToRegister != [] or taskSpec.prodSourceLabel in ['user']:
             # prod vs anal
             userSetup = False
             if taskSpec.prodSourceLabel in ['user']:
                 userSetup = True
                 # collect datasetID to register datasets/containers just in case
                 for tmpPandaJob in pandaJobs:
                     if not tmpPandaJob.produceUnMerge():
                         for tmpFileSpec in tmpPandaJob.Files:
                             if tmpFileSpec.type in ['output','log']:
                                 if not tmpFileSpec.datasetID in datasetToRegister:
                                     datasetToRegister.append(tmpFileSpec.datasetID)
             tmpLog.info('datasetToRegister={0}'.format(str(datasetToRegister)))
             # get site mapper
             siteMapper = self.taskBufferIF.getSiteMapper()
             # loop over all datasets
             avDatasetList = []
             cnDatasetMap  = {}
             for datasetID in datasetToRegister:
                 # get output and log datasets
                 tmpLog.info('getting datasetSpec with datasetID={0}'.format(datasetID))
                 tmpStat,datasetSpec = self.taskBufferIF.getDatasetWithID_JEDI(taskSpec.jediTaskID,
                                                                               datasetID)
                 if not tmpStat:
                     tmpLog.error('failed to get output and log datasets')
                     return retFatal
                 if datasetSpec.isPseudo():
                     tmpLog.info('skip pseudo dataset')
                     continue
                 # DDM backend
                 ddmBackEnd = taskSpec.getDdmBackEnd()
                 tmpLog.info('checking {0}'.format(datasetSpec.datasetName)) 
                 # check if dataset and container are available in DDM
                 for targetName in [datasetSpec.datasetName,datasetSpec.containerName]:
                     if targetName == None:
                         continue
                     if not targetName in avDatasetList:
                         # set lifetime
                         if targetName.startswith('panda'):
                             if datasetSpec.type == 'trn_log' and taskSpec.prodSourceLabel == 'managed':
                                 lifetime = 365
                             else:
                                 lifetime = 14
                         else:
                             lifetime = None
                         # check dataset/container in DDM
                         tmpList = ddmIF.listDatasets(targetName)
                         if tmpList == []:
                             # get location
                             location = None
                             locForRule = None
                             if targetName == datasetSpec.datasetName:
                                 # dataset
                                 if datasetSpec.site in ['',None]:
                                     if DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                                         locForRule = datasetSpec.destination
                                     elif DataServiceUtils.getDestinationSE(datasetSpec.storageToken) != None:
                                         location = DataServiceUtils.getDestinationSE(datasetSpec.storageToken)
                                     elif taskSpec.cloud != None:
                                         # use T1 SE
                                         tmpT1Name = siteMapper.getCloud(taskSpec.cloud)['source']
                                         location = siteMapper.getDdmEndpoint(tmpT1Name,datasetSpec.storageToken)
                                 else:
                                     tmpLog.info('site={0} token='.format(datasetSpec.site,datasetSpec.storageToken))
                                     location = siteMapper.getDdmEndpoint(datasetSpec.site,datasetSpec.storageToken)
                             if locForRule == None:
                                 locForRule = location
                             # set metadata
                             if taskSpec.prodSourceLabel in ['managed','test'] and targetName == datasetSpec.datasetName:
                                 metaData = {}
                                 metaData['task_id'] = taskSpec.jediTaskID
                                 if not taskSpec.campaign in [None,'']:
                                     metaData['campaign'] = taskSpec.campaign 
                                 if datasetSpec.getTransient() != None:
                                     metaData['transient'] = datasetSpec.getTransient()
                             else:
                                 metaData = None
                             # register dataset/container
                             tmpLog.info('registering {0} with location={1} backend={2} lifetime={3} meta={4}'.format(targetName,
                                                                                                                      location,
                                                                                                                      ddmBackEnd,
                                                                                                                      lifetime,
                                                                                                                      str(metaData)))
                             tmpStat = ddmIF.registerNewDataset(targetName,backEnd=ddmBackEnd,location=location,
                                                                lifetime=lifetime,metaData=metaData)
                             if not tmpStat:
                                 tmpLog.error('failed to register {0}'.format(targetName))
                                 return retFatal
                             # procedures for user 
                             if userSetup or DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                                 # register location
                                 tmpToRegister = False
                                 if userSetup and targetName == datasetSpec.datasetName and not datasetSpec.site in ['',None]:
                                     userName = taskSpec.userName
                                     grouping = None
                                     tmpToRegister = True
                                 elif DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                                     userName = None
                                     grouping = 'NONE'
                                     tmpToRegister = True
                                 if tmpToRegister:
                                     activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
                                     tmpLog.info('registering location={0} lifetime={1}days activity={2} grouping={3}'.format(locForRule,lifetime,
                                                                                                                              activity,grouping))
                                     tmpStat = ddmIF.registerDatasetLocation(targetName,locForRule,owner=userName,
                                                                             lifetime=lifetime,backEnd=ddmBackEnd,
                                                                             activity=activity,grouping=grouping)
                                     if not tmpStat:
                                         tmpLog.error('failed to register location {0} for {1}'.format(locForRule,
                                                                                                       targetName))
                                         return retFatal
                                     # double copy
                                     if userSetup and datasetSpec.type == 'output':
                                         if datasetSpec.destination != datasetSpec.site:
                                             tmpLog.info('skip making double copy as destination={0} is not site={1}'.format(datasetSpec.destination,
                                                                                                                             datasetSpec.site))
                                         else:
                                             locForDouble = '(type=SCRATCHDISK)\\notforextracopy=1'
                                             tmpMsg  = 'registering double copy '
                                             tmpMsg += 'location="{0}" lifetime={1}days activity={2} for dataset={3}'.format(locForDouble,lifetime,
                                                                                                                             activity,targetName)
                                             tmpLog.info(tmpMsg)
                                             tmpStat = ddmIF.registerDatasetLocation(targetName,locForDouble,copies=2,owner=userName,
                                                                                     lifetime=lifetime,activity=activity,
                                                                                     grouping='NONE',weight='freespace',
                                                                                     ignore_availability=False)
                                             if not tmpStat:
                                                 tmpLog.error('failed to register double copylocation {0} for {1}'.format(locForDouble,
                                                                                                                        targetName))
                                                 return retFatal
                             avDatasetList.append(targetName)
                         else:
                             tmpLog.info('{0} already registered'.format(targetName))
                 # check if dataset is in the container
                 if datasetSpec.containerName != None and datasetSpec.containerName != datasetSpec.datasetName:
                     # get list of constituent datasets in the container
                     if not cnDatasetMap.has_key(datasetSpec.containerName):
                         cnDatasetMap[datasetSpec.containerName] = ddmIF.listDatasetsInContainer(datasetSpec.containerName)
                     # add dataset
                     if not datasetSpec.datasetName in cnDatasetMap[datasetSpec.containerName]:
                         tmpLog.info('adding {0} to {1}'.format(datasetSpec.datasetName,datasetSpec.containerName)) 
                         tmpStat = ddmIF.addDatasetsToContainer(datasetSpec.containerName,[datasetSpec.datasetName],
                                                                backEnd=ddmBackEnd)
                         if not tmpStat:
                             tmpLog.error('failed to add {0} to {1}'.format(datasetSpec.datasetName,
                                                                            datasetSpec.containerName))
                             return retFatal
                         cnDatasetMap[datasetSpec.containerName].append(datasetSpec.datasetName)
                     else:
                         tmpLog.info('{0} already in {1}'.format(datasetSpec.datasetName,datasetSpec.containerName)) 
                 # update dataset
                 datasetSpec.status = 'registered'
                 self.taskBufferIF.updateDataset_JEDI(datasetSpec,{'jediTaskID':taskSpec.jediTaskID,
                                                                   'datasetID':datasetID})
         # register ES datasets
         if taskSpec.registerEsFiles():
             targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
             location = None
             metaData = {}
             metaData['task_id'] = taskSpec.jediTaskID
             metaData['hidden']  = True
             tmpLog.info('registering ES dataset {0} with location={1} meta={2}'.format(targetName,
                                                                                        location,
                                                                                        str(metaData)))
             tmpStat = ddmIF.registerNewDataset(targetName,location=location,metaData=metaData,
                                                resurrect=True)
             if not tmpStat:
                 tmpLog.error('failed to register ES dataset {0}'.format(targetName))
                 return retFatal
             # register rule
             location = 'type=DATADISK' 
             activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
             grouping = 'NONE'
             tmpLog.info('registering location={0} activity={1} grouping={2}'.format(location,
                                                                                     activity,
                                                                                     grouping))
             tmpStat = ddmIF.registerDatasetLocation(targetName,location,activity=activity,
                                                     grouping=grouping)
             if not tmpStat:
                 tmpLog.error('failed to register location {0} with {2} for {1}'.format(location,
                                                                                        targetName,
                                                                                        activity))
                 return retFatal
         # open datasets
         if taskSpec.prodSourceLabel in ['managed','test']:
             # get the list of output/log datasets
             outDatasetList = []
             for tmpPandaJob in pandaJobs:
                 for tmpFileSpec in tmpPandaJob.Files:
                     if tmpFileSpec.type in ['output','log']:
                         if not tmpFileSpec.destinationDBlock in outDatasetList:
                             outDatasetList.append(tmpFileSpec.destinationDBlock)
             # open datasets
             for outDataset in outDatasetList:
                 tmpLog.info('open {0}'.format(outDataset))
                 ddmIF.openDataset(outDataset)
                 # unset lifetime
                 ddmIF.setDatasetMetadata(outDataset,'lifetime',None)
         # return
         tmpLog.info('done')        
         return retOK
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doSetup failed with {0}:{1}'.format(errtype.__name__,errvalue))
         taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
         return retFatal
Ejemplo n.º 3
0
    def doSetup(self,taskSpec,datasetToRegister,pandaJobs):
        # make logger
        tmpLog = MsgWrapper(logger,"< jediTaskID={0} >".format(taskSpec.jediTaskID))
        tmpLog.info('start label={0} taskType={1}'.format(taskSpec.prodSourceLabel,taskSpec.taskType))
        # returns
        retFatal    = self.SC_FATAL
        retTmpError = self.SC_FAILED
        retOK       = self.SC_SUCCEEDED
        try:
            # get DDM I/F
            ddmIF = self.ddmIF.getInterface(taskSpec.vo)
            # register datasets
            if datasetToRegister != [] or taskSpec.prodSourceLabel in ['user']:
                # prod vs anal
                userSetup = False
                if taskSpec.prodSourceLabel in ['user']:
                    userSetup = True
                    # collect datasetID to register datasets/containers just in case
                    for tmpPandaJob in pandaJobs:
                        if not tmpPandaJob.produceUnMerge():
                            for tmpFileSpec in tmpPandaJob.Files:
                                if tmpFileSpec.type in ['output','log']:
                                    if tmpFileSpec.datasetID not in datasetToRegister:
                                        datasetToRegister.append(tmpFileSpec.datasetID)
                tmpLog.info('datasetToRegister={0}'.format(str(datasetToRegister)))
                # get site mapper
                siteMapper = self.taskBufferIF.getSiteMapper()

                # loop over all datasets
                avDatasetList = []
                cnDatasetMap  = {}
                for datasetID in datasetToRegister:
                    # get output and log datasets
                    tmpLog.info('getting datasetSpec with datasetID={0}'.format(datasetID))
                    tmpStat,datasetSpec = self.taskBufferIF.getDatasetWithID_JEDI(taskSpec.jediTaskID,
                                                                                  datasetID)
                    if not tmpStat:
                        tmpLog.error('failed to get output and log datasets')
                        return retFatal
                    if datasetSpec.isPseudo():
                        tmpLog.info('skip pseudo dataset')
                        continue
                    # DDM backend
                    ddmBackEnd = taskSpec.getDdmBackEnd()
                    tmpLog.info('checking {0}'.format(datasetSpec.datasetName))
                    # check if dataset and container are available in DDM
                    for targetName in [datasetSpec.datasetName,datasetSpec.containerName]:
                        if targetName is None:
                            continue
                        if targetName not in avDatasetList:
                            # set lifetime
                            if targetName.startswith('panda'):
                                if datasetSpec.type == 'trn_log' and taskSpec.prodSourceLabel == 'managed':
                                    lifetime = 365
                                else:
                                    lifetime = 14
                            else:
                                lifetime = None
                            # check dataset/container in DDM
                            tmpList = ddmIF.listDatasets(targetName)
                            if tmpList == []:
                                # get location
                                location = None
                                locForRule = None
                                if targetName == datasetSpec.datasetName:
                                    # dataset
                                    if datasetSpec.site in ['',None]:
                                        if DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) is not None:
                                            locForRule = datasetSpec.destination
                                        elif DataServiceUtils.getDestinationSE(datasetSpec.storageToken) is not None:
                                            location = DataServiceUtils.getDestinationSE(datasetSpec.storageToken)
                                        elif taskSpec.cloud is not None:
                                            # use T1 SE
                                            tmpT1Name = siteMapper.getCloud(taskSpec.cloud)['source']
                                            location = siteMapper.getDdmEndpoint(tmpT1Name, datasetSpec.storageToken,
                                                                                 taskSpec.prodSourceLabel,
                                                                                 JobUtils.translate_tasktype_to_jobtype(taskSpec.taskType))
                                    else:
                                        tmpLog.info('site={0} token={1}'.format(datasetSpec.site, datasetSpec.storageToken))
                                        location = siteMapper.getDdmEndpoint(datasetSpec.site,datasetSpec.storageToken,
                                                                             taskSpec.prodSourceLabel,
                                                                             JobUtils.translate_tasktype_to_jobtype(taskSpec.taskType))
                                if locForRule is None:
                                    locForRule = location
                                # set metadata
                                if taskSpec.prodSourceLabel in ['managed','test'] and targetName == datasetSpec.datasetName:
                                    metaData = {}
                                    metaData['task_id'] = taskSpec.jediTaskID
                                    if taskSpec.campaign not in [None,'']:
                                        metaData['campaign'] = taskSpec.campaign
                                    if datasetSpec.getTransient() is not None:
                                        metaData['transient'] = datasetSpec.getTransient()
                                else:
                                    metaData = None
                                # register dataset/container
                                tmpLog.info('registering {0} with location={1} backend={2} lifetime={3} meta={4}'.format(targetName,
                                                                                                                         location,
                                                                                                                         ddmBackEnd,
                                                                                                                         lifetime,
                                                                                                                         str(metaData)))
                                tmpStat = ddmIF.registerNewDataset(targetName,backEnd=ddmBackEnd,location=location,
                                                                   lifetime=lifetime,metaData=metaData)
                                if not tmpStat:
                                    tmpLog.error('failed to register {0}'.format(targetName))
                                    return retFatal
                                # procedures for user
                                if userSetup or DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) is not None:
                                    # register location
                                    tmpToRegister = False
                                    if userSetup and targetName == datasetSpec.datasetName and datasetSpec.site not in ['',None]:
                                        if taskSpec.workingGroup:
                                            userName = taskSpec.workingGroup
                                        else:
                                            userName = taskSpec.userName
                                        grouping = None
                                        tmpToRegister = True
                                    elif DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) is not None:
                                        userName = None
                                        grouping = 'NONE'
                                        tmpToRegister = True
                                    if tmpToRegister:
                                        activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
                                        tmpLog.info('registering location={} lifetime={} days activity={} grouping={} '
                                                    'owner={}'.format(locForRule, lifetime, activity, grouping,
                                                                      userName))
                                        tmpStat = ddmIF.registerDatasetLocation(targetName,locForRule,owner=userName,
                                                                                lifetime=lifetime,backEnd=ddmBackEnd,
                                                                                activity=activity,grouping=grouping)
                                        if not tmpStat:
                                            tmpLog.error('failed to register location {0} for {1}'.format(locForRule,
                                                                                                          targetName))
                                            return retFatal
                                        # double copy
                                        if userSetup and datasetSpec.type == 'output':
                                            if datasetSpec.destination != datasetSpec.site:
                                                tmpLog.info('skip making double copy as destination={0} is not site={1}'.format(datasetSpec.destination,
                                                                                                                                datasetSpec.site))
                                            else:

                                                second_copy = True
                                                try:
                                                    if taskSpec.site:
                                                        panda_site = siteMapper.getSite(taskSpec.site)
                                                        if panda_site.catchall and 'skip_2nd_copy' in panda_site.catchall:
                                                            tmpLog.info('skip making double copy as specified in {0} catchall'.format(panda_site))
                                                            second_copy = False
                                                except Exception:
                                                    second_copy = True

                                                if second_copy:
                                                    locForDouble = '(type=SCRATCHDISK)\\notforextracopy=True'
                                                    tmpMsg  = 'registering double copy '
                                                    tmpMsg += 'location="{0}" lifetime={1}days activity={2} for dataset={3}'.format(locForDouble,lifetime,
                                                                                                                                    activity,targetName)
                                                    tmpLog.info(tmpMsg)
                                                    tmpStat = ddmIF.registerDatasetLocation(targetName,locForDouble,copies=2,owner=userName,
                                                                                            lifetime=lifetime,activity=activity,
                                                                                            grouping='NONE',weight='freespace',
                                                                                            ignore_availability=False)
                                                    if not tmpStat:
                                                        tmpLog.error('failed to register double copylocation {0} for {1}'.format(locForDouble,
                                                                                                                               targetName))
                                                        return retFatal
                                avDatasetList.append(targetName)
                            else:
                                tmpLog.info('{0} already registered'.format(targetName))
                    # check if dataset is in the container
                    if datasetSpec.containerName is not None and datasetSpec.containerName != datasetSpec.datasetName:
                        # get list of constituent datasets in the container
                        if datasetSpec.containerName not in cnDatasetMap:
                            cnDatasetMap[datasetSpec.containerName] = ddmIF.listDatasetsInContainer(datasetSpec.containerName)
                        # add dataset
                        if datasetSpec.datasetName not in cnDatasetMap[datasetSpec.containerName]:
                            tmpLog.info('adding {0} to {1}'.format(datasetSpec.datasetName,datasetSpec.containerName))
                            tmpStat = ddmIF.addDatasetsToContainer(datasetSpec.containerName,[datasetSpec.datasetName],
                                                                   backEnd=ddmBackEnd)
                            if not tmpStat:
                                tmpLog.error('failed to add {0} to {1}'.format(datasetSpec.datasetName,
                                                                               datasetSpec.containerName))
                                return retFatal
                            cnDatasetMap[datasetSpec.containerName].append(datasetSpec.datasetName)
                        else:
                            tmpLog.info('{0} already in {1}'.format(datasetSpec.datasetName,datasetSpec.containerName))
                    # update dataset
                    datasetSpec.status = 'registered'
                    self.taskBufferIF.updateDataset_JEDI(datasetSpec,{'jediTaskID':taskSpec.jediTaskID,
                                                                      'datasetID':datasetID})
            # register ES datasets
            if taskSpec.registerEsFiles():
                targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
                location = None
                metaData = {}
                metaData['task_id'] = taskSpec.jediTaskID
                metaData['hidden']  = True
                tmpLog.info('registering ES dataset {0} with location={1} meta={2}'.format(targetName,
                                                                                           location,
                                                                                           str(metaData)))
                tmpStat = ddmIF.registerNewDataset(targetName,location=location,metaData=metaData,
                                                   resurrect=True)
                if not tmpStat:
                    tmpLog.error('failed to register ES dataset {0}'.format(targetName))
                    return retFatal
                # register rule
                location = 'type=DATADISK'
                activity = DataServiceUtils.getActivityForOut(taskSpec.prodSourceLabel)
                grouping = 'NONE'
                tmpLog.info('registering location={0} activity={1} grouping={2}'.format(location,
                                                                                        activity,
                                                                                        grouping))
                tmpStat = ddmIF.registerDatasetLocation(targetName,location,activity=activity,
                                                        grouping=grouping)
                if not tmpStat:
                    tmpLog.error('failed to register location {0} with {2} for {1}'.format(location,
                                                                                           targetName,
                                                                                           activity))
                    return retFatal
            # open datasets
            if taskSpec.prodSourceLabel in ['managed','test']:
                # get the list of output/log datasets
                outDatasetList = []
                for tmpPandaJob in pandaJobs:
                    for tmpFileSpec in tmpPandaJob.Files:
                        if tmpFileSpec.type in ['output','log']:
                            if tmpFileSpec.destinationDBlock not in outDatasetList:
                                outDatasetList.append(tmpFileSpec.destinationDBlock)
                # open datasets
                for outDataset in outDatasetList:
                    tmpLog.info('open {0}'.format(outDataset))
                    ddmIF.openDataset(outDataset)
                    # unset lifetime
                    ddmIF.setDatasetMetadata(outDataset,'lifetime',None)
            # return
            tmpLog.info('done')
            return retOK
        except Exception:
            errtype,errvalue = sys.exc_info()[:2]
            tmpLog.error('doSetup failed with {0}:{1}'.format(errtype.__name__,errvalue))
            taskSpec.setErrDiag(tmpLog.uploadLog(taskSpec.jediTaskID))
            return retFatal
Ejemplo n.º 4
0
 def doPostProcess(self,taskSpec,tmpLog):
     # pre-check
     try:
         tmpStat = self.doPreCheck(taskSpec,tmpLog)
         if tmpStat:
             return self.SC_SUCCEEDED
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doPreCheck failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     # get DDM I/F
     ddmIF = self.ddmIF.getInterface(taskSpec.vo)
     # loop over all datasets
     for datasetSpec in taskSpec.datasetSpecList:
         # skip pseudo output datasets
         if datasetSpec.type in ['output'] and datasetSpec.isPseudo():
             continue
         try:
             # remove wrong files
             if datasetSpec.type in ['output']:
                 # get successful files
                 okFiles = self.taskBufferIF.getSuccessfulFiles_JEDI(datasetSpec.jediTaskID,datasetSpec.datasetID)
                 if okFiles == None:
                     tmpLog.warning('failed to get successful files for {0}'.format(datasetSpec.datasetName))
                     return self.SC_FAILED
                 # get files in dataset
                 ddmFiles = ddmIF.getFilesInDataset(datasetSpec.datasetName,skipDuplicate=False,ignoreUnknown=True)
                 tmpLog.debug('datasetID={0}:Name={1} has {2} files in DB, {3} files in DDM'.format(datasetSpec.datasetID,
                                                                                                   datasetSpec.datasetName,
                                                                                                   len(okFiles),len(ddmFiles)))
                 # check all files
                 toDelete = []
                 for tmpGUID,attMap in ddmFiles.iteritems():
                     if attMap['lfn'] not in okFiles:
                         did = {'scope':attMap['scope'], 'name':attMap['lfn']}
                         toDelete.append(did)
                         tmpLog.debug('delete {0} from {1}'.format(attMap['lfn'],datasetSpec.datasetName))
                 # delete
                 if toDelete != []:
                     ddmIF.deleteFilesFromDataset(datasetSpec.datasetName,toDelete)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to remove wrong files with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # freeze output and log datasets
             if datasetSpec.type in ['output','log','trn_log']:
                 tmpLog.info('freezing datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 ddmIF.freezeDataset(datasetSpec.datasetName,ignoreUnknown=True)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to freeze datasets with {0}:{1}'.format(errtype.__name__,errvalue))
             return self.SC_FAILED
         try:
             # delete transient datasets
             if datasetSpec.type in ['trn_output']:
                 tmpLog.debug('deleting datasetID={0}:Name={1}'.format(datasetSpec.datasetID,datasetSpec.datasetName))
                 retStr = ddmIF.deleteDataset(datasetSpec.datasetName,False,ignoreUnknown=True)
                 tmpLog.info(retStr)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete datasets with {0}:{1}'.format(errtype.__name__,errvalue))
     # check duplication
     if self.getFinalTaskStatus(taskSpec) in ['finished','done']:
         nDup = self.taskBufferIF.checkDuplication_JEDI(taskSpec.jediTaskID)
         tmpLog.debug('checked duplication with {0}'.format(nDup))
         if nDup > 0:
             errStr = 'paused since {0} duplication found'.format(nDup)
             taskSpec.oldStatus = self.getFinalTaskStatus(taskSpec)
             taskSpec.status = 'paused'
             taskSpec.setErrDiag(errStr)
             tmpLog.debug(errStr)
     # delete ES datasets
     if taskSpec.registerEsFiles():
         try:
             targetName = EventServiceUtils.getEsDatasetName(taskSpec.jediTaskID)
             tmpLog.debug('deleting ES dataset name={0}'.format(targetName))
             retStr = ddmIF.deleteDataset(targetName,False,ignoreUnknown=True)
             tmpLog.debug(retStr)
         except:
             errtype,errvalue = sys.exc_info()[:2]
             tmpLog.warning('failed to delete ES dataset with {0}:{1}'.format(errtype.__name__,errvalue))
     try:
         self.doBasicPostProcess(taskSpec,tmpLog)
     except:
         errtype,errvalue = sys.exc_info()[:2]
         tmpLog.error('doBasicPostProcess failed with {0}:{1}'.format(errtype.__name__,errvalue))
         return self.SC_FATAL
     return self.SC_SUCCEEDED