def findGenericPilotCredentials( vo = False, group = False ): if not group and not vo: return S_ERROR( "Need a group or a VO to determine the Generic pilot credentials" ) if not vo: vo = Registry.getVOForGroup( group ) if not vo: return S_ERROR( "Group %s does not have a VO associated" % group ) opsHelper = Operations.Operations( vo = vo ) pilotGroup = opsHelper.getValue( "Pilot/GenericPilotGroup", "" ) pilotDN = opsHelper.getValue( "Pilot/GenericPilotDN", "" ) if pilotDN and pilotGroup: gLogger.verbose( "Pilot credentials have been defined in the CS. Using %s@%s" % ( pilotDN, pilotGroup ) ) result = gProxyManager.userHasProxy( pilotDN, pilotGroup, 86400 ) if not result[ 'OK' ]: return S_ERROR( "%s@%s has no proxy uploaded to the ProxyManager" ) return S_OK( ( pilotDN, pilotGroup ) ) #Auto discover gLogger.verbose( "Pilot credentials are not defined. Autodiscovering..." ) if pilotGroup: pilotGroups = [ pilotGroup ] else: result = Registry.getGroupsWithProperty( Properties.GENERIC_PILOT ) if not result[ 'OK' ]: return result pilotGroups = [] groups = result[ 'Value' ] if not groups: return S_ERROR( "No group with %s property defined" % Properties.GENERIC_PILOT ) result = Registry.getGroupsForVO( vo ) if not result[ 'OK' ]: return result for voGroup in result[ 'Value' ]: if voGroup in groups: pilotGroups.append( voGroup ) if not pilotGroups: return S_ERROR( "No group for VO %s is a generic pilot group" % vo ) for pilotGroup in pilotGroups: DNs = Registry.getDNsInGroup( pilotGroup ) if not DNs: continue if pilotDN: if pilotDN not in DNs: continue result = gProxyManager.userHasProxy( pilotDN, pilotGroup, 86400 ) if result[ 'OK' ] and result[ 'Value' ]: gLogger.verbose( "Discovered pilot credentials are %s@%s" % ( pilotDN, pilotGroup ) ) return S_OK( ( pilotDN, pilotGroup ) ) else: for DN in DNs: result = gProxyManager.userHasProxy( DN, pilotGroup, 86400 ) if result[ 'OK' ] and result[ 'Value' ]: gLogger.verbose( "Discovered pilot credentials are %s@%s" % ( DN, pilotGroup ) ) return S_OK( ( DN, pilotGroup ) ) if pilotDN: return S_ERROR( "DN %s does not have group %s" % ( pilotDN, pilotGroups ) ) return S_ERROR( "No generic proxy in the Proxy Manager with groups %s" % pilotGroups )
def findGenericPilotCredentials( vo = False, group = False ): if not group and not vo: return S_ERROR( "Need a group or a VO to determine the Generic pilot credentials" ) if not vo: vo = Registry.getVOForGroup( group ) if not vo: return S_ERROR( "Group %s does not have a VO associated" % group ) opsHelper = Operations.Operations( vo = vo ) pilotGroup = opsHelper.getValue( "Pilot/GenericPilotGroup", "" ) pilotDN = opsHelper.getValue( "Pilot/GenericPilotDN", "" ) if pilotDN and pilotGroup: gLogger.verbose( "Pilot credentials from CS: %s@%s" % ( pilotDN, pilotGroup ) ) result = gProxyManager.userHasProxy( pilotDN, pilotGroup, 86400 ) if not result[ 'OK' ]: return S_ERROR( "%s@%s has no proxy in ProxyManager" ) return S_OK( ( pilotDN, pilotGroup ) ) #Auto discover gLogger.verbose( "Pilot credentials are not defined. Autodiscovering..." ) if pilotGroup: pilotGroups = [ pilotGroup ] else: result = Registry.getGroupsWithProperty( Properties.GENERIC_PILOT ) if not result[ 'OK' ]: return result pilotGroups = [] groups = result[ 'Value' ] if not groups: return S_ERROR( "No group with %s property defined" % Properties.GENERIC_PILOT ) result = Registry.getGroupsForVO( vo ) if not result[ 'OK' ]: return result for voGroup in result[ 'Value' ]: if voGroup in groups: pilotGroups.append( voGroup ) if not pilotGroups: return S_ERROR( "No generic pilot group for VO %s" % vo ) for pilotGroup in pilotGroups: DNs = Registry.getDNsInGroup( pilotGroup ) if not DNs: continue if pilotDN: if pilotDN not in DNs: continue result = gProxyManager.userHasProxy( pilotDN, pilotGroup, 86400 ) if result[ 'OK' ] and result[ 'Value' ]: gLogger.verbose( "Discovered pilot credentials: %s@%s" % ( pilotDN, pilotGroup ) ) return S_OK( ( pilotDN, pilotGroup ) ) else: for DN in DNs: result = gProxyManager.userHasProxy( DN, pilotGroup, 86400 ) if result[ 'OK' ] and result[ 'Value' ]: gLogger.verbose( "Discovered pilot credentials: %s@%s" % ( DN, pilotGroup ) ) return S_OK( ( DN, pilotGroup ) ) if pilotDN: return S_ERROR( "DN %s does not have group %s" % ( pilotDN, pilotGroups ) ) return S_ERROR( "No generic proxy in the Proxy Manager with groups %s" % pilotGroups )
def mapVoToGroups(voname): """ Returns all groups available for a given VO as a set. """ vo_dict = Registry.getGroupsForVO(voname) if not vo_dict["OK"]: raise RuntimeError("Could not retrieve groups for vo %s." % voname) return set(vo_dict["Value"])
def mapVoToGroups( voname ): """ Returns all groups available for a given VO as a set. """ vo_dict = Registry.getGroupsForVO( voname ) if not vo_dict[ 'OK' ]: raise RuntimeError( 'Could not retrieve groups for vo %s.' % voname ) return set( vo_dict[ 'Value' ] )
def _checkCredentials(self, resourceDict, credDict): """ Check if we can get a job given the passed credentials """ if Properties.GENERIC_PILOT in credDict['properties']: # You can only match groups in the same VO if credDict['group'] == "hosts": # for the host case the VirtualOrganization parameter # is mandatory in resourceDict vo = resourceDict.get('VirtualOrganization', '') else: vo = Registry.getVOForGroup(credDict['group']) if 'OwnerGroup' not in resourceDict: result = Registry.getGroupsForVO(vo) if result['OK']: resourceDict['OwnerGroup'] = result['Value'] else: raise RuntimeError(result['Message']) else: # If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict['properties']: self.log.notice( "Setting the resource DN to the credentials DN") resourceDict['OwnerDN'] = credDict['DN'] # If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict['properties']: resourceDict['OwnerGroup'] = credDict['group'] self.log.notice( "Setting the resource group to the credentials group") if 'OwnerDN' in resourceDict and resourceDict[ 'OwnerDN'] != credDict['DN']: ownerDN = resourceDict['OwnerDN'] result = Registry.getGroupsForDN(resourceDict['OwnerDN']) if not result['OK']: raise RuntimeError(result['Message']) if credDict['group'] not in result['Value']: # DN is not in the same group! bad boy. self.log.warn( "You cannot request jobs from this DN, as it does not belong to your group!", "(%s)" % ownerDN) resourceDict['OwnerDN'] = credDict['DN'] # Nothing special, group and DN have to be the same else: resourceDict['OwnerDN'] = credDict['DN'] resourceDict['OwnerGroup'] = credDict['group'] return resourceDict
def _checkCredentials(self, resourceDict, credDict): """ Check if we can get a job given the passed credentials """ if Properties.GENERIC_PILOT in credDict['properties']: # You can only match groups in the same VO if credDict['group'] == "hosts": # for the host case the VirtualOrganization parameter # is mandatory in resourceDict vo = resourceDict.get('VirtualOrganization', '') else: vo = Registry.getVOForGroup(credDict['group']) if 'OwnerGroup' not in resourceDict: result = Registry.getGroupsForVO(vo) if result['OK']: resourceDict['OwnerGroup'] = result['Value'] else: raise RuntimeError(result['Message']) else: # If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict['properties']: self.log.notice("Setting the resource DN to the credentials DN") resourceDict['OwnerDN'] = credDict['DN'] # If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict['properties']: resourceDict['OwnerGroup'] = credDict['group'] self.log.notice("Setting the resource group to the credentials group") if 'OwnerDN' in resourceDict and resourceDict['OwnerDN'] != credDict['DN']: ownerDN = resourceDict['OwnerDN'] result = Registry.getGroupsForDN(resourceDict['OwnerDN']) if not result['OK']: raise RuntimeError(result['Message']) if credDict['group'] not in result['Value']: # DN is not in the same group! bad boy. self.log.notice("You cannot request jobs from DN %s. It does not belong to your group!" % ownerDN) resourceDict['OwnerDN'] = credDict['DN'] # Nothing special, group and DN have to be the same else: resourceDict['OwnerDN'] = credDict['DN'] resourceDict['OwnerGroup'] = credDict['group'] return resourceDict
def getValidGroups(self, rawProperties): """ Get valid groups as specified in the method authorization rules :param rawProperties: all method properties :type rawProperties: python:list :return: list of allowed groups or [] """ validGroups = [] for prop in list(rawProperties): if prop.startswith('group:'): rawProperties.remove(prop) prop = prop.replace('group:', '') validGroups.append(prop) elif prop.startswith('vo:'): rawProperties.remove(prop) vo = prop.replace('vo:', '') result = Registry.getGroupsForVO(vo) if result['OK']: validGroups.extend(result['Value']) validGroups = list(set(validGroups)) return validGroups
def _checkCredentials( self, resourceDict, credDict ): """ Check if we can get a job given the passed credentials """ # Check credentials if not generic pilot if Properties.GENERIC_PILOT in credDict[ 'properties' ]: # You can only match groups in the same VO vo = Registry.getVOForGroup( credDict[ 'group' ] ) result = Registry.getGroupsForVO( vo ) if result[ 'OK' ]: resourceDict[ 'OwnerGroup' ] = result[ 'Value' ] else: raise RuntimeError( result['Message'] ) else: # If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict[ 'properties' ]: self.log.notice( "Setting the resource DN to the credentials DN" ) resourceDict[ 'OwnerDN' ] = credDict[ 'DN' ] # If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict[ 'properties' ]: resourceDict[ 'OwnerGroup' ] = credDict[ 'group' ] self.log.notice( "Setting the resource group to the credentials group" ) if 'OwnerDN' in resourceDict and resourceDict[ 'OwnerDN' ] != credDict[ 'DN' ]: ownerDN = resourceDict[ 'OwnerDN' ] result = Registry.getGroupsForDN( resourceDict[ 'OwnerDN' ] ) if not result[ 'OK' ]: raise RuntimeError( result['Message'] ) if credDict[ 'group' ] not in result[ 'Value' ]: # DN is not in the same group! bad boy. self.log.notice( "You cannot request jobs from DN %s. It does not belong to your group!" % ownerDN ) resourceDict[ 'OwnerDN' ] = credDict[ 'DN' ] # Nothing special, group and DN have to be the same else: resourceDict[ 'OwnerDN' ] = credDict[ 'DN' ] resourceDict[ 'OwnerGroup' ] = credDict[ 'group' ] return resourceDict
def beginExecution( self ): self.gridEnv = self.am_getOption( "GridEnv", getGridEnv() ) # The SiteDirector is for a particular user community self.vo = self.am_getOption( "VO", '' ) if not self.vo: self.vo = self.am_getOption( "Community", '' ) if not self.vo: self.vo = CSGlobals.getVO() # The SiteDirector is for a particular user group self.group = self.am_getOption( "Group", '' ) # self.voGroups contain all the eligible user groups for pilots submutted by this SiteDirector self.voGroups = [] # Choose the group for which pilots will be submitted. This is a hack until # we will be able to match pilots to VOs. if not self.group: if self.vo: result = Registry.getGroupsForVO( self.vo ) if not result['OK']: return result for group in result['Value']: if 'NormalUser' in Registry.getPropertiesForGroup( group ): self.voGroups.append( group ) else: self.voGroups = [ self.group ] result = findGenericPilotCredentials( vo = self.vo ) if not result[ 'OK' ]: return result self.pilotDN, self.pilotGroup = result[ 'Value' ] self.pilotDN = self.am_getOption( "PilotDN", self.pilotDN ) self.pilotGroup = self.am_getOption( "PilotGroup", self.pilotGroup ) self.platforms = [] self.sites = [] self.defaultSubmitPools = '' if self.group: self.defaultSubmitPools = Registry.getGroupOption( self.group, 'SubmitPools', '' ) elif self.vo: self.defaultSubmitPools = Registry.getVOOption( self.vo, 'SubmitPools', '' ) self.pilot = self.am_getOption( 'PilotScript', DIRAC_PILOT ) self.install = DIRAC_INSTALL self.extraModules = self.am_getOption( 'ExtraPilotModules', [] ) + DIRAC_MODULES self.workingDirectory = self.am_getOption( 'WorkDirectory' ) self.maxQueueLength = self.am_getOption( 'MaxQueueLength', 86400 * 3 ) self.pilotLogLevel = self.am_getOption( 'PilotLogLevel', 'INFO' ) self.maxJobsInFillMode = self.am_getOption( 'MaxJobsInFillMode', self.maxJobsInFillMode ) self.maxPilotsToSubmit = self.am_getOption( 'MaxPilotsToSubmit', self.maxPilotsToSubmit ) self.pilotWaitingFlag = self.am_getOption( 'PilotWaitingFlag', True ) self.pilotWaitingTime = self.am_getOption( 'MaxPilotWaitingTime', 3600 ) self.failedQueueCycleFactor = self.am_getOption( 'FailedQueueCycleFactor', 10 ) self.pilotStatusUpdateCycleFactor = self.am_getOption( 'PilotStatusUpdateCycleFactor', 10 ) # Flags self.updateStatus = self.am_getOption( 'UpdatePilotStatus', True ) self.getOutput = self.am_getOption( 'GetPilotOutput', True ) self.sendAccounting = self.am_getOption( 'SendPilotAccounting', True ) # Get the site description dictionary siteNames = None if not self.am_getOption( 'Site', 'Any' ).lower() == "any": siteNames = self.am_getOption( 'Site', [] ) if not siteNames: siteNames = None ceTypes = None if not self.am_getOption( 'CETypes', 'Any' ).lower() == "any": ceTypes = self.am_getOption( 'CETypes', [] ) ces = None if not self.am_getOption( 'CEs', 'Any' ).lower() == "any": ces = self.am_getOption( 'CEs', [] ) if not ces: ces = None result = Resources.getQueues( community = self.vo, siteList = siteNames, ceList = ces, ceTypeList = ceTypes, mode = 'Direct' ) if not result['OK']: return result resourceDict = result['Value'] result = self.getQueues( resourceDict ) if not result['OK']: return result #if not siteNames: # siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' ) # if siteName == 'Unknown': # return S_OK( 'No site specified for the SiteDirector' ) # else: # siteNames = [siteName] #self.siteNames = siteNames if self.updateStatus: self.log.always( 'Pilot status update requested' ) if self.getOutput: self.log.always( 'Pilot output retrieval requested' ) if self.sendAccounting: self.log.always( 'Pilot accounting sending requested' ) self.log.always( 'Sites:', siteNames ) self.log.always( 'CETypes:', ceTypes ) self.log.always( 'CEs:', ces ) self.log.always( 'PilotDN:', self.pilotDN ) self.log.always( 'PilotGroup:', self.pilotGroup ) self.log.always( 'MaxPilotsToSubmit:', self.maxPilotsToSubmit ) self.log.always( 'MaxJobsInFillMode:', self.maxJobsInFillMode ) self.localhost = socket.getfqdn() self.proxy = '' if self.firstPass: if self.queueDict: self.log.always( "Agent will serve queues:" ) for queue in self.queueDict: self.log.always( "Site: %s, CE: %s, Queue: %s" % ( self.queueDict[queue]['Site'], self.queueDict[queue]['CEName'], queue ) ) self.firstPass = False return S_OK()
def beginExecution(self): self.gridEnv = self.am_getOption("GridEnv", getGridEnv()) # The SiteDirector is for a particular user community self.vo = self.am_getOption("Community", '') if not self.vo: self.vo = CSGlobals.getVO() # The SiteDirector is for a particular user group self.group = self.am_getOption("Group", '') # self.voGroups contain all the eligible user groups for pilots submutted by this SiteDirector self.voGroups = [] # Choose the group for which pilots will be submitted. This is a hack until # we will be able to match pilots to VOs. if not self.group: if self.vo: result = Registry.getGroupsForVO(self.vo) if not result['OK']: return result for group in result['Value']: if 'NormalUser' in Registry.getPropertiesForGroup(group): self.voGroups.append(group) else: self.voGroups = [self.group] result = findGenericPilotCredentials(vo=self.vo) if not result['OK']: return result self.pilotDN, self.pilotGroup = result['Value'] self.pilotDN = self.am_getOption("PilotDN", self.pilotDN) self.pilotGroup = self.am_getOption("PilotGroup", self.pilotGroup) self.platforms = [] self.sites = [] self.defaultSubmitPools = '' if self.group: self.defaultSubmitPools = Registry.getGroupOption( self.group, 'SubmitPools', '') elif self.vo: self.defaultSubmitPools = Registry.getVOOption( self.vo, 'SubmitPools', '') self.pilot = self.am_getOption('PilotScript', DIRAC_PILOT) self.install = DIRAC_INSTALL self.workingDirectory = self.am_getOption('WorkDirectory') self.maxQueueLength = self.am_getOption('MaxQueueLength', 86400 * 3) self.pilotLogLevel = self.am_getOption('PilotLogLevel', 'INFO') self.maxJobsInFillMode = self.am_getOption('MaxJobsInFillMode', self.maxJobsInFillMode) self.maxPilotsToSubmit = self.am_getOption('MaxPilotsToSubmit', self.maxPilotsToSubmit) self.pilotWaitingFlag = self.am_getOption('PilotWaitingFlag', True) self.pilotWaitingTime = self.am_getOption('MaxPilotWaitingTime', 7200) # Flags self.updateStatus = self.am_getOption('UpdatePilotStatus', True) self.getOutput = self.am_getOption('GetPilotOutput', True) self.sendAccounting = self.am_getOption('SendPilotAccounting', True) # Get the site description dictionary siteNames = None if not self.am_getOption('Site', 'Any').lower() == "any": siteNames = self.am_getOption('Site', []) ceTypes = None if not self.am_getOption('CETypes', 'Any').lower() == "any": ceTypes = self.am_getOption('CETypes', []) ces = None if not self.am_getOption('CEs', 'Any').lower() == "any": ces = self.am_getOption('CEs', []) result = Resources.getQueues(community=self.vo, siteList=siteNames, ceList=ces, ceTypeList=ceTypes, mode='Direct') if not result['OK']: return result resourceDict = result['Value'] result = self.getQueues(resourceDict) if not result['OK']: return result #if not siteNames: # siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' ) # if siteName == 'Unknown': # return S_OK( 'No site specified for the SiteDirector' ) # else: # siteNames = [siteName] #self.siteNames = siteNames if self.updateStatus: self.log.always('Pilot status update requested') if self.getOutput: self.log.always('Pilot output retrieval requested') if self.sendAccounting: self.log.always('Pilot accounting sending requested') self.log.always('Sites:', siteNames) self.log.always('CETypes:', ceTypes) self.log.always('CEs:', ces) self.log.always('PilotDN:', self.pilotDN) self.log.always('PilotGroup:', self.pilotGroup) self.log.always('MaxPilotsToSubmit:', self.maxPilotsToSubmit) self.log.always('MaxJobsInFillMode:', self.maxJobsInFillMode) self.localhost = socket.getfqdn() self.proxy = '' if self.queueDict: self.log.always("Agent will serve queues:") for queue in self.queueDict: self.log.always("Site: %s, CE: %s, Queue: %s" % (self.queueDict[queue]['Site'], self.queueDict[queue]['CEName'], queue)) return S_OK()
def selectJob( self, resourceDescription ): """ Main job selection function to find the highest priority job matching the resource capacity """ startTime = time.time() resourceDict = self.__processResourceDescription( resourceDescription ) credDict = self.getRemoteCredentials() #Check credentials if not generic pilot if Properties.GENERIC_PILOT in credDict[ 'properties' ]: #You can only match groups in the same VO vo = Registry.getVOForGroup( credDict[ 'group' ] ) result = Registry.getGroupsForVO( vo ) if result[ 'OK' ]: resourceDict[ 'OwnerGroup' ] = result[ 'Value' ] else: #If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict[ 'properties' ]: gLogger.notice( "Setting the resource DN to the credentials DN" ) resourceDict[ 'OwnerDN' ] = credDict[ 'DN' ] #If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict[ 'properties' ]: resourceDict[ 'OwnerGroup' ] = credDict[ 'group' ] gLogger.notice( "Setting the resource group to the credentials group" ) if 'OwnerDN' in resourceDict and resourceDict[ 'OwnerDN' ] != credDict[ 'DN' ]: ownerDN = resourceDict[ 'OwnerDN' ] result = Registry.getGroupsForDN( resourceDict[ 'OwnerDN' ] ) if not result[ 'OK' ] or credDict[ 'group' ] not in result[ 'Value' ]: #DN is not in the same group! bad boy. gLogger.notice( "You cannot request jobs from DN %s. It does not belong to your group!" % ownerDN ) resourceDict[ 'OwnerDN' ] = credDict[ 'DN' ] #Nothing special, group and DN have to be the same else: resourceDict[ 'OwnerDN' ] = credDict[ 'DN' ] resourceDict[ 'OwnerGroup' ] = credDict[ 'group' ] # Check the pilot DIRAC version if self.__opsHelper.getValue( "Pilot/CheckVersion", True ): if 'ReleaseVersion' not in resourceDict: if not 'DIRACVersion' in resourceDict: return S_ERROR( 'Version check requested and not provided by Pilot' ) else: pilotVersion = resourceDict['DIRACVersion'] else: pilotVersion = resourceDict['ReleaseVersion'] validVersions = self.__opsHelper.getValue( "Pilot/Version", [] ) if validVersions and pilotVersion not in validVersions: return S_ERROR( 'Pilot version does not match the production version %s not in ( %s )' % \ ( pilotVersion, ",".join( validVersions ) ) ) #Check project if requested validProject = self.__opsHelper.getValue( "Pilot/Project", "" ) if validProject: if 'ReleaseProject' not in resourceDict: return S_ERROR( "Version check requested but expected project %s not received" % validProject ) if resourceDict[ 'ReleaseProject' ] != validProject: return S_ERROR( "Version check requested but expected project %s != received %s" % ( validProject, resourceDict[ 'ReleaseProject' ] ) ) # Update pilot information pilotInfoReported = False pilotReference = resourceDict.get( 'PilotReference', '' ) if pilotReference: if "PilotInfoReportedFlag" in resourceDict and not resourceDict['PilotInfoReportedFlag']: gridCE = resourceDict.get( 'GridCE', 'Unknown' ) site = resourceDict.get( 'Site', 'Unknown' ) benchmark = benchmark = resourceDict.get( 'PilotBenchmark', 0.0 ) gLogger.verbose('Reporting pilot info for %s: gridCE=%s, site=%s, benchmark=%f' % (pilotReference,gridCE,site,benchmark) ) result = gPilotAgentsDB.setPilotStatus( pilotReference, status = 'Running', gridSite = site, destination = gridCE, benchmark = benchmark ) if result['OK']: pilotInfoReported = True #Check the site mask if not 'Site' in resourceDict: return S_ERROR( 'Missing Site Name in Resource JDL' ) # Get common site mask and check the agent site result = gJobDB.getSiteMask( siteState = 'Active' ) if not result['OK']: return S_ERROR( 'Internal error: can not get site mask' ) maskList = result['Value'] siteName = resourceDict['Site'] if siteName not in maskList: # if 'GridCE' not in resourceDict: # return S_ERROR( 'Site not in mask and GridCE not specified' ) # Even if the site is banned, if it defines a CE, it must be able to check it # del resourceDict['Site'] # Banned site can only take Test jobs resourceDict['JobType'] = 'Test' resourceDict['Setup'] = self.serviceInfoDict['clientSetup'] gLogger.verbose( "Resource description:" ) for key in resourceDict: gLogger.verbose( "%s : %s" % ( key.rjust( 20 ), resourceDict[ key ] ) ) negativeCond = self.__limiter.getNegativeCondForSite( siteName ) result = gTaskQueueDB.matchAndGetJob( resourceDict, negativeCond = negativeCond ) if DEBUG: print result if not result['OK']: return result result = result['Value'] if not result['matchFound']: return S_ERROR( 'No match found' ) jobID = result['jobId'] resAtt = gJobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] ) if not resAtt['OK']: return S_ERROR( 'Could not retrieve job attributes' ) if not resAtt['Value']: return S_ERROR( 'No attributes returned for job' ) if not resAtt['Value']['Status'] == 'Waiting': gLogger.error( 'Job matched by the TQ is not in Waiting state', str( jobID ) ) result = gTaskQueueDB.deleteJob( jobID ) if not result[ 'OK' ]: return result return S_ERROR( "Job %s is not in Waiting state" % str( jobID ) ) attNames = ['Status','MinorStatus','ApplicationStatus','Site'] attValues = ['Matched','Assigned','Unknown',siteName] result = gJobDB.setJobAttributes( jobID, attNames, attValues ) # result = gJobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' ) result = gJobLoggingDB.addLoggingRecord( jobID, status = 'Matched', minor = 'Assigned', source = 'Matcher' ) result = gJobDB.getJobJDL( jobID ) if not result['OK']: return S_ERROR( 'Failed to get the job JDL' ) resultDict = {} resultDict['JDL'] = result['Value'] resultDict['JobID'] = jobID matchTime = time.time() - startTime gLogger.info( "Match time: [%s]" % str( matchTime ) ) gMonitor.addMark( "matchTime", matchTime ) # Get some extra stuff into the response returned resOpt = gJobDB.getJobOptParameters( jobID ) if resOpt['OK']: for key, value in resOpt['Value'].items(): resultDict[key] = value resAtt = gJobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] ) if not resAtt['OK']: return S_ERROR( 'Could not retrieve job attributes' ) if not resAtt['Value']: return S_ERROR( 'No attributes returned for job' ) if self.__opsHelper.getValue( "JobScheduling/CheckMatchingDelay", True ): self.__limiter.updateDelayCounters( siteName, jobID ) # Report pilot-job association if pilotReference: result = gPilotAgentsDB.setCurrentJobID( pilotReference, jobID ) result = gPilotAgentsDB.setJobForPilot( jobID, pilotReference, updateStatus=False ) resultDict['DN'] = resAtt['Value']['OwnerDN'] resultDict['Group'] = resAtt['Value']['OwnerGroup'] resultDict['PilotInfoReportedFlag'] = pilotInfoReported return S_OK( resultDict )
def selectJob(self, resourceDescription): """ Main job selection function to find the highest priority job matching the resource capacity """ startTime = time.time() resourceDict = self.__processResourceDescription(resourceDescription) credDict = self.getRemoteCredentials() #Check credentials if not generic pilot if Properties.GENERIC_PILOT in credDict['properties']: #You can only match groups in the same VO vo = Registry.getVOForGroup(credDict['group']) result = Registry.getGroupsForVO(vo) if result['OK']: resourceDict['OwnerGroup'] = result['Value'] else: #If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict['properties']: gLogger.notice("Setting the resource DN to the credentials DN") resourceDict['OwnerDN'] = credDict['DN'] #If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict['properties']: resourceDict['OwnerGroup'] = credDict['group'] gLogger.notice( "Setting the resource group to the credentials group") if 'OwnerDN' in resourceDict and resourceDict[ 'OwnerDN'] != credDict['DN']: ownerDN = resourceDict['OwnerDN'] result = Registry.getGroupsForDN(resourceDict['OwnerDN']) if not result['OK'] or credDict['group'] not in result[ 'Value']: #DN is not in the same group! bad boy. gLogger.notice( "You cannot request jobs from DN %s. It does not belong to your group!" % ownerDN) resourceDict['OwnerDN'] = credDict['DN'] #Nothing special, group and DN have to be the same else: resourceDict['OwnerDN'] = credDict['DN'] resourceDict['OwnerGroup'] = credDict['group'] # Check the pilot DIRAC version if self.__opsHelper.getValue("Pilot/CheckVersion", True): if 'ReleaseVersion' not in resourceDict: if not 'DIRACVersion' in resourceDict: return S_ERROR( 'Version check requested and not provided by Pilot') else: pilotVersion = resourceDict['DIRACVersion'] else: pilotVersion = resourceDict['ReleaseVersion'] validVersions = self.__opsHelper.getValue("Pilot/Version", []) if validVersions and pilotVersion not in validVersions: return S_ERROR( 'Pilot version does not match the production version %s not in ( %s )' % \ ( pilotVersion, ",".join( validVersions ) ) ) #Check project if requested validProject = self.__opsHelper.getValue("Pilot/Project", "") if validProject: if 'ReleaseProject' not in resourceDict: return S_ERROR( "Version check requested but expected project %s not received" % validProject) if resourceDict['ReleaseProject'] != validProject: return S_ERROR( "Version check requested but expected project %s != received %s" % (validProject, resourceDict['ReleaseProject'])) # Update pilot information pilotInfoReported = resourceDict.get('PilotInfoReportedFlag', False) pilotReference = resourceDict.get('PilotReference', '') if pilotReference and not pilotInfoReported: gridCE = resourceDict.get('GridCE', 'Unknown') site = resourceDict.get('Site', 'Unknown') benchmark = benchmark = resourceDict.get('PilotBenchmark', 0.0) gLogger.verbose( 'Reporting pilot info for %s: gridCE=%s, site=%s, benchmark=%f' % (pilotReference, gridCE, site, benchmark)) result = gPilotAgentsDB.setPilotStatus(pilotReference, status='Running', gridSite=site, destination=gridCE, benchmark=benchmark) if result['OK']: pilotInfoReported = True #Check the site mask if not 'Site' in resourceDict: return S_ERROR('Missing Site Name in Resource JDL') # Get common site mask and check the agent site result = gJobDB.getSiteMask(siteState='Active') if not result['OK']: return S_ERROR('Internal error: can not get site mask') maskList = result['Value'] siteName = resourceDict['Site'] if siteName not in maskList: # if 'GridCE' not in resourceDict: # return S_ERROR( 'Site not in mask and GridCE not specified' ) # Even if the site is banned, if it defines a CE, it must be able to check it # del resourceDict['Site'] # Banned site can only take Test jobs resourceDict['JobType'] = 'Test' resourceDict['Setup'] = self.serviceInfoDict['clientSetup'] gLogger.verbose("Resource description:") for key in resourceDict: gLogger.verbose("%s : %s" % (key.rjust(20), resourceDict[key])) negativeCond = self.__limiter.getNegativeCondForSite(siteName) result = gTaskQueueDB.matchAndGetJob(resourceDict, negativeCond=negativeCond) if DEBUG: print result if not result['OK']: return result result = result['Value'] if not result['matchFound']: return S_ERROR('No match found') jobID = result['jobId'] resAtt = gJobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup', 'Status']) if not resAtt['OK']: return S_ERROR('Could not retrieve job attributes') if not resAtt['Value']: return S_ERROR('No attributes returned for job') if not resAtt['Value']['Status'] == 'Waiting': gLogger.error('Job matched by the TQ is not in Waiting state', str(jobID)) result = gTaskQueueDB.deleteJob(jobID) if not result['OK']: return result return S_ERROR("Job %s is not in Waiting state" % str(jobID)) attNames = ['Status', 'MinorStatus', 'ApplicationStatus', 'Site'] attValues = ['Matched', 'Assigned', 'Unknown', siteName] result = gJobDB.setJobAttributes(jobID, attNames, attValues) # result = gJobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' ) result = gJobLoggingDB.addLoggingRecord(jobID, status='Matched', minor='Assigned', source='Matcher') result = gJobDB.getJobJDL(jobID) if not result['OK']: return S_ERROR('Failed to get the job JDL') resultDict = {} resultDict['JDL'] = result['Value'] resultDict['JobID'] = jobID matchTime = time.time() - startTime gLogger.info("Match time: [%s]" % str(matchTime)) gMonitor.addMark("matchTime", matchTime) # Get some extra stuff into the response returned resOpt = gJobDB.getJobOptParameters(jobID) if resOpt['OK']: for key, value in resOpt['Value'].items(): resultDict[key] = value resAtt = gJobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup']) if not resAtt['OK']: return S_ERROR('Could not retrieve job attributes') if not resAtt['Value']: return S_ERROR('No attributes returned for job') if self.__opsHelper.getValue("JobScheduling/CheckMatchingDelay", True): self.__limiter.updateDelayCounters(siteName, jobID) # Report pilot-job association if pilotReference: result = gPilotAgentsDB.setCurrentJobID(pilotReference, jobID) result = gPilotAgentsDB.setJobForPilot(jobID, pilotReference, updateStatus=False) resultDict['DN'] = resAtt['Value']['OwnerDN'] resultDict['Group'] = resAtt['Value']['OwnerGroup'] resultDict['PilotInfoReportedFlag'] = pilotInfoReported return S_OK(resultDict)
def beginExecution(self): # The Director is for a particular user community self.vo = self.am_getOption("VO", '') if not self.vo: self.vo = CSGlobals.getVO() # The SiteDirector is for a particular user group self.group = self.am_getOption("Group", '') # Choose the group for which clouds will be submitted. This is a hack until # we will be able to match clouds to VOs. if not self.group: if self.vo: result = Registry.getGroupsForVO(self.vo) if not result['OK']: return result self.voGroups = [] for group in result['Value']: if 'NormalUser' in Registry.getPropertiesForGroup(group): self.voGroups.append(group) else: self.voGroups = [self.group] result = findGenericCloudCredentials(vo=self.vo) if not result['OK']: return result self.cloudDN, self.cloudGroup = result['Value'] self.maxVMsToSubmit = self.am_getOption('MaxVMsToSubmit', 1) self.runningPod = self.am_getOption('RunningPod', self.vo) # Get the site description dictionary siteNames = None if not self.am_getOption('Site', 'Any').lower() == "any": siteNames = self.am_getOption('Site', []) if not siteNames: siteNames = None ces = None if not self.am_getOption('CEs', 'Any').lower() == "any": ces = self.am_getOption('CEs', []) if not ces: ces = None result = getVMTypes(vo=self.vo, siteList=siteNames) if not result['OK']: return result resourceDict = result['Value'] result = self.getEndpoints(resourceDict) if not result['OK']: return result # if not siteNames: # siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' ) # if siteName == 'Unknown': # return S_OK( 'No site specified for the SiteDirector' ) # else: # siteNames = [siteName] #self.siteNames = siteNames self.log.always('Sites:', siteNames) self.log.always('CEs:', ces) self.log.always('CloudDN:', self.cloudDN) self.log.always('CloudGroup:', self.cloudGroup) self.localhost = socket.getfqdn() self.proxy = '' if self.firstPass: if self.vmTypeDict: self.log.always("Agent will serve VM types:") for vmType in self.vmTypeDict: self.log.always( "Site: %s, CE: %s, VMType: %s" % (self.vmTypeDict[vmType]['Site'], self.vmTypeDict[vmType]['CEName'], vmType)) self.firstPass = False return S_OK()
def beginExecution(self): self.gridEnv = self.am_getOption("GridEnv", getGridEnv()) # The SiteDirector is for a particular user community self.vo = self.am_getOption("Community", "") if not self.vo: self.vo = CSGlobals.getVO() # The SiteDirector is for a particular user group self.group = self.am_getOption("Group", "") # self.voGroups contain all the eligible user groups for pilots submutted by this SiteDirector self.voGroups = [] # Choose the group for which pilots will be submitted. This is a hack until # we will be able to match pilots to VOs. if not self.group: if self.vo: result = Registry.getGroupsForVO(self.vo) if not result["OK"]: return result for group in result["Value"]: if "NormalUser" in Registry.getPropertiesForGroup(group): self.voGroups.append(group) else: self.voGroups = [self.group] result = findGenericPilotCredentials(vo=self.vo) if not result["OK"]: return result self.pilotDN, self.pilotGroup = result["Value"] self.pilotDN = self.am_getOption("PilotDN", self.pilotDN) self.pilotGroup = self.am_getOption("PilotGroup", self.pilotGroup) self.platforms = [] self.sites = [] self.defaultSubmitPools = "" if self.group: self.defaultSubmitPools = Registry.getGroupOption(self.group, "SubmitPools", "") elif self.vo: self.defaultSubmitPools = Registry.getVOOption(self.vo, "SubmitPools", "") self.pilot = self.am_getOption("PilotScript", DIRAC_PILOT) self.install = DIRAC_INSTALL self.workingDirectory = self.am_getOption("WorkDirectory") self.maxQueueLength = self.am_getOption("MaxQueueLength", 86400 * 3) self.pilotLogLevel = self.am_getOption("PilotLogLevel", "INFO") self.maxJobsInFillMode = self.am_getOption("MaxJobsInFillMode", self.maxJobsInFillMode) self.maxPilotsToSubmit = self.am_getOption("MaxPilotsToSubmit", self.maxPilotsToSubmit) self.pilotWaitingFlag = self.am_getOption("PilotWaitingFlag", True) self.pilotWaitingTime = self.am_getOption("MaxPilotWaitingTime", 7200) # Flags self.updateStatus = self.am_getOption("UpdatePilotStatus", True) self.getOutput = self.am_getOption("GetPilotOutput", True) self.sendAccounting = self.am_getOption("SendPilotAccounting", True) # Get the site description dictionary siteNames = None if not self.am_getOption("Site", "Any").lower() == "any": siteNames = self.am_getOption("Site", []) ceTypes = None if not self.am_getOption("CETypes", "Any").lower() == "any": ceTypes = self.am_getOption("CETypes", []) ces = None if not self.am_getOption("CEs", "Any").lower() == "any": ces = self.am_getOption("CEs", []) result = Resources.getQueues( community=self.vo, siteList=siteNames, ceList=ces, ceTypeList=ceTypes, mode="Direct" ) if not result["OK"]: return result resourceDict = result["Value"] result = self.getQueues(resourceDict) if not result["OK"]: return result # if not siteNames: # siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' ) # if siteName == 'Unknown': # return S_OK( 'No site specified for the SiteDirector' ) # else: # siteNames = [siteName] # self.siteNames = siteNames if self.updateStatus: self.log.always("Pilot status update requested") if self.getOutput: self.log.always("Pilot output retrieval requested") if self.sendAccounting: self.log.always("Pilot accounting sending requested") self.log.always("Sites:", siteNames) self.log.always("CETypes:", ceTypes) self.log.always("CEs:", ces) self.log.always("PilotDN:", self.pilotDN) self.log.always("PilotGroup:", self.pilotGroup) self.log.always("MaxPilotsToSubmit:", self.maxPilotsToSubmit) self.log.always("MaxJobsInFillMode:", self.maxJobsInFillMode) self.localhost = socket.getfqdn() self.proxy = "" if self.queueDict: self.log.always("Agent will serve queues:") for queue in self.queueDict: self.log.always( "Site: %s, CE: %s, Queue: %s" % (self.queueDict[queue]["Site"], self.queueDict[queue]["CEName"], queue) ) return S_OK()