def _setUpworkloadCE(self): """Get application queue and configure it :return: a ComputingElement instance """ # Get CE parameters workloadSite = gConfig.getValue("/LocalSite/Site") workloadCE = gConfig.getValue("/LocalSite/GridCE") workloadQueue = gConfig.getValue("/LocalSite/CEQueue") result = getQueue(workloadSite, workloadCE, workloadQueue) if not result["OK"]: return result ceType = result["Value"]["CEType"] ceParams = result["Value"] # Build CE ceFactory = ComputingElementFactory() result = ceFactory.getCE(ceName=workloadCE, ceType=ceType, ceParametersDict=ceParams) if not result["OK"]: return result workloadCE = result["Value"] # Add a proxy to the CE result = getProxyInfo() if not result["OK"] and not result["Value"]["chain"]: return result proxy = result["Value"]["chain"] result = proxy.getRemainingSecs() if not result["OK"]: return result lifetime_secs = result["Value"] workloadCE.setProxy(proxy, lifetime_secs) return S_OK(workloadCE)
def export_killPilot(self, pilotRefList ): """ Kill the specified pilots """ # Make a list if it is not yet pilotRefs = list( pilotRefList ) if type( pilotRefList ) in StringTypes: pilotRefs = [pilotRefList] # Regroup pilots per site and per owner pilotRefDict = {} for pilotReference in pilotRefs: result = pilotDB.getPilotInfo(pilotReference) if not result['OK'] or not result[ 'Value' ]: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] queue = '@@@'.join( [owner, group, pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']] ) gridType = pilotDict['GridType'] pilotRefDict.setdefault( queue, {} ) pilotRefDict[queue].setdefault( 'PilotList', [] ) pilotRefDict[queue]['PilotList'].append( pilotReference ) pilotRefDict[queue]['GridType'] = gridType # Do the work now queue by queue ceFactory = ComputingElementFactory() failed = [] for key, pilotDict in pilotRefDict.items(): owner,group,site,ce,queue = key.split( '@@@' ) result = getQueue( site, ce, queue ) if not result['OK']: return result queueDict = result['Value'] gridType = pilotDict['GridType'] result = ceFactory.getCE( gridType, ce, queueDict ) if not result['OK']: return result ce = result['Value'] if gridType in ["LCG","gLite","CREAM"]: group = getGroupOption(group,'VOMSRole',group) ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group ) if not ret['OK']: gLogger.error( ret['Message'] ) gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) ) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret['Value'] ce.setProxy( proxy ) pilotList = pilotDict['PilotList'] result = ce.killJob( pilotList ) if not result['OK']: failed.extend( pilotList ) if failed: return S_ERROR('Failed to kill at least some pilots') return S_OK()
def getPilotCE(pilotDict): """Instantiate and return a CE bound to a pilot""" ceFactory = ComputingElementFactory() result = getQueue(pilotDict["GridSite"], pilotDict["DestinationSite"], pilotDict["Queue"]) if not result["OK"]: return result queueDict = result["Value"] gridEnv = getGridEnv() queueDict["GridEnv"] = gridEnv queueDict["WorkingDirectory"] = mkdtemp() result = ceFactory.getCE(pilotDict["GridType"], pilotDict["DestinationSite"], queueDict) if not result["OK"]: shutil.rmtree(queueDict["WorkingDirectory"]) return result ce = result["Value"] return S_OK(ce)
def killPilotsInQueues(pilotRefDict): """kill pilots queue by queue :params dict pilotRefDict: a dict of pilots in queues """ ceFactory = ComputingElementFactory() failed = [] for key, pilotDict in pilotRefDict.items(): owner, group, site, ce, queue = key.split("@@@") result = getQueue(site, ce, queue) if not result["OK"]: return result queueDict = result["Value"] gridType = pilotDict["GridType"] result = ceFactory.getCE(gridType, ce, queueDict) if not result["OK"]: return result ce = result["Value"] # FIXME: quite hacky. Should be either removed, or based on some flag if gridType in ["CREAM", "ARC", "Globus", "HTCondorCE"]: group = getGroupOption(group, "VOMSRole", group) ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group) if not ret["OK"]: gLogger.error( "Could not get proxy:", 'User "%s" Group "%s" : %s' % (owner, group, ret["Message"])) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret["Value"] ce.setProxy(proxy) pilotList = pilotDict["PilotList"] result = ce.killJob(pilotList) if not result["OK"]: failed.extend(pilotList) return failed
def __getGridJobOutput(self,pilotReference): """ Get the pilot job standard output and standard error files for the Grid job reference """ result = pilotDB.getPilotInfo(pilotReference) if not result['OK'] or not result[ 'Value' ]: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? result = pilotDB.getPilotOutput(pilotReference) if result['OK']: stdout = result['Value']['StdOut'] error = result['Value']['StdErr'] if stdout or error: resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK(resultDict) else: gLogger.warn( 'Empty pilot output found for %s' % pilotReference ) gridType = pilotDict['GridType'] if gridType in ["LCG","gLite","CREAM"]: group = getGroupOption(group,'VOMSRole',group) ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group ) if not ret['OK']: gLogger.error( ret['Message'] ) gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) ) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret['Value'] pilotStamp = pilotDict['PilotStamp'] result = getPilotOutput( proxy, gridType, pilotReference, pilotStamp ) if not result['OK']: return S_ERROR('Failed to get pilot output: '+result['Message']) # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? stdout = result['StdOut'] error = result['StdErr'] fileList = result['FileList'] if stdout: result = pilotDB.storePilotOutput(pilotReference,stdout,error) if not result['OK']: gLogger.error('Failed to store pilot output:',result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = fileList return S_OK(resultDict) else: # Instantiate the appropriate CE ceFactory = ComputingElementFactory() result = getQueue( pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue'] ) if not result['OK']: return result queueDict = result['Value'] result = ceFactory.getCE( gridType, pilotDict['DestinationSite'], queueDict ) if not result['OK']: return result ce = result['Value'] pilotStamp = pilotDict['PilotStamp'] pRef = pilotReference if pilotStamp: pRef = pRef + ':::' + pilotStamp result = ce.getJobOutput( pRef ) if not result['OK']: return result stdout,error = result['Value'] if stdout: result = pilotDB.storePilotOutput(pilotReference,stdout,error) if not result['OK']: gLogger.error('Failed to store pilot output:',result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK( resultDict )
def export_killPilot(self, pilotRefList): """ Kill the specified pilots """ # Make a list if it is not yet pilotRefs = list(pilotRefList) if type(pilotRefList) in StringTypes: pilotRefs = [pilotRefList] # Regroup pilots per site and per owner pilotRefDict = {} for pilotReference in pilotRefs: result = pilotDB.getPilotInfo(pilotReference) if not result['OK'] or not result['Value']: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] queue = '@@@'.join([ owner, group, pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue'] ]) gridType = pilotDict['GridType'] pilotRefDict.setdefault(queue, {}) pilotRefDict[queue].setdefault('PilotList', []) pilotRefDict[queue]['PilotList'].append(pilotReference) pilotRefDict[queue]['GridType'] = gridType # Do the work now queue by queue ceFactory = ComputingElementFactory() failed = [] for key, pilotDict in pilotRefDict.items(): owner, group, site, ce, queue = key.split('@@@') result = getQueue(site, ce, queue) if not result['OK']: return result queueDict = result['Value'] gridType = pilotDict['GridType'] result = ceFactory.getCE(gridType, ce, queueDict) if not result['OK']: return result ce = result['Value'] if gridType in ["LCG", "gLite", "CREAM"]: group = getGroupOption(group, 'VOMSRole', group) ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group) if not ret['OK']: gLogger.error(ret['Message']) gLogger.error('Could not get proxy:', 'User "%s", Group "%s"' % (owner, group)) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret['Value'] ce.setProxy(proxy) pilotList = pilotDict['PilotList'] result = ce.killJob(pilotList) if not result['OK']: failed.extend(pilotList) if failed: return S_ERROR('Failed to kill at least some pilots') return S_OK()
def __getGridJobOutput(self, pilotReference): """ Get the pilot job standard output and standard error files for the Grid job reference """ result = pilotDB.getPilotInfo(pilotReference) if not result['OK'] or not result['Value']: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? result = pilotDB.getPilotOutput(pilotReference) if result['OK']: stdout = result['Value']['StdOut'] error = result['Value']['StdErr'] if stdout or error: resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK(resultDict) else: gLogger.warn('Empty pilot output found for %s' % pilotReference) gridType = pilotDict['GridType'] if gridType in ["LCG", "gLite", "CREAM"]: group = getGroupOption(group, 'VOMSRole', group) ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group) if not ret['OK']: gLogger.error(ret['Message']) gLogger.error('Could not get proxy:', 'User "%s", Group "%s"' % (owner, group)) return S_ERROR("Failed to get the pilot's owner proxy") proxy = ret['Value'] pilotStamp = pilotDict['PilotStamp'] result = getPilotOutput(proxy, gridType, pilotReference, pilotStamp) if not result['OK']: return S_ERROR('Failed to get pilot output: ' + result['Message']) # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? stdout = result['StdOut'] error = result['StdErr'] fileList = result['FileList'] if stdout: result = pilotDB.storePilotOutput(pilotReference, stdout, error) if not result['OK']: gLogger.error('Failed to store pilot output:', result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = fileList return S_OK(resultDict) else: # Instantiate the appropriate CE ceFactory = ComputingElementFactory() result = getQueue(pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']) if not result['OK']: return result queueDict = result['Value'] result = ceFactory.getCE(gridType, pilotDict['DestinationSite'], queueDict) if not result['OK']: return result ce = result['Value'] pilotStamp = pilotDict['PilotStamp'] pRef = pilotReference if pilotStamp: pRef = pRef + ':::' + pilotStamp result = ce.getJobOutput(pRef) if not result['OK']: return result stdout, error = result['Value'] if stdout: result = pilotDB.storePilotOutput(pilotReference, stdout, error) if not result['OK']: gLogger.error('Failed to store pilot output:', result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK(resultDict)
def getGridJobOutput(pilotReference): """ Get the pilot job standard output and standard error files for the Grid job reference :param str pilotReference: a grid (job) pilot reference """ result = pilotAgentsDB.getPilotInfo(pilotReference) if not result['OK'] or not result['Value']: return S_ERROR('Failed to get info for pilot ' + pilotReference) pilotDict = result['Value'][pilotReference] owner = pilotDict['OwnerDN'] group = pilotDict['OwnerGroup'] # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files? result = pilotAgentsDB.getPilotOutput(pilotReference) if result['OK']: stdout = result['Value']['StdOut'] error = result['Value']['StdErr'] if stdout or error: resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] return S_OK(resultDict) else: gLogger.warn('Empty pilot output found', 'for %s' % pilotReference) # Instantiate the appropriate CE ceFactory = ComputingElementFactory() result = getQueue(pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']) if not result['OK']: return result queueDict = result['Value'] gridEnv = getGridEnv() queueDict['GridEnv'] = gridEnv queueDict['WorkingDirectory'] = mkdtemp() result = ceFactory.getCE(pilotDict['GridType'], pilotDict['DestinationSite'], queueDict) if not result['OK']: shutil.rmtree(queueDict['WorkingDirectory']) return result ce = result['Value'] groupVOMS = getGroupOption(group, 'VOMSRole', group) result = gProxyManager.getPilotProxyFromVOMSGroup(owner, groupVOMS) if not result['OK']: gLogger.error( 'Could not get proxy:', 'User "%s" Group "%s" : %s' % (owner, groupVOMS, result['Message'])) return S_ERROR("Failed to get the pilot's owner proxy") proxy = result['Value'] ce.setProxy(proxy) pilotStamp = pilotDict['PilotStamp'] pRef = pilotReference if pilotStamp: pRef = pRef + ':::' + pilotStamp result = ce.getJobOutput(pRef) if not result['OK']: shutil.rmtree(queueDict['WorkingDirectory']) return result stdout, error = result['Value'] if stdout: result = pilotAgentsDB.storePilotOutput(pilotReference, stdout, error) if not result['OK']: gLogger.error('Failed to store pilot output:', result['Message']) resultDict = {} resultDict['StdOut'] = stdout resultDict['StdErr'] = error resultDict['OwnerDN'] = owner resultDict['OwnerGroup'] = group resultDict['FileList'] = [] shutil.rmtree(queueDict['WorkingDirectory']) return S_OK(resultDict)