def ProbeSoftwareArea(): """ Look into the shared area and report back to the SoftwareTag service """ from DIRAC import gLogger, gConfig #site = siteName() ce = gConfig.getValue('/LocalSite/GridCE', '') if not ce: return S_ERROR("CE undefined, cannot proceed") from GlastDIRAC.ResourceStatusSystem.Client.SoftwareTagClient import SoftwareTagClient swtc = SoftwareTagClient() if not 'VO_GLAST_ORG_SW_DIR' in os.environ: res = swtc.updateCEStatus("", ce, "Bad") if not res['OK']: return S_ERROR("Failed to report Bad site, missing software area.") return S_ERROR("Missing VO_GLAST_ORG_SW_DIR environment variable") base_sw_dir = os.environ['VO_GLAST_ORG_SW_DIR'] gLogger.notice("Found the following software directory:", base_sw_dir) message = None directory_list = [] for root, dirnames, files in os.walk( os.path.join(base_sw_dir, "glast/ground/releases")): if "bin" in dirnames: directory_list.append(root) for directory in directory_list: gLogger.notice("Decoding %s and tries to make a tag out of it" % directory) #Need mapping between Tag name and local software directory name res = getMappingTagFromDirectory(directory) if not res['OK']: gLogger.error("Failed finding relation between directory and Tag") continue tag = res['Value'] gLogger.notice("Found tag ", tag) res = swtc.updateCEStatus(tag, ce, 'Valid') if not res['OK']: gLogger.error("Failed to report back: %s" % res['Message']) message = res['Message'] else: gLogger.notice("Tag now Valid!") if message: return S_ERROR(message) return S_OK()
def ProbeSoftwareArea(): """ Look into the shared area and report back to the SoftwareTag service """ from DIRAC import gLogger, gConfig #site = siteName() ce = gConfig.getValue('/LocalSite/GridCE', '') if not ce: return S_ERROR("CE undefined, cannot proceed") from GlastDIRAC.ResourceStatusSystem.Client.SoftwareTagClient import SoftwareTagClient swtc = SoftwareTagClient() if not 'VO_GLAST_ORG_SW_DIR' in os.environ: res = swtc.updateCEStatus("", ce, "Bad") if not res['OK']: return S_ERROR("Failed to report Bad site, missing software area.") return S_ERROR("Missing VO_GLAST_ORG_SW_DIR environment variable") base_sw_dir = os.environ['VO_GLAST_ORG_SW_DIR'] gLogger.notice("Found the following software directory:", base_sw_dir) message = None directory_list = [] for root, dirnames, files in os.walk(os.path.join(base_sw_dir,"glast/ground/releases")): if "bin" in dirnames: directory_list.append(root) for directory in directory_list: gLogger.notice("Decoding %s and tries to make a tag out of it" % directory) #Need mapping between Tag name and local software directory name res = getMappingTagFromDirectory(directory) if not res['OK']: gLogger.error("Failed finding relation between directory and Tag") continue tag = res['Value'] gLogger.notice("Found tag ", tag) res = swtc.updateCEStatus(tag, ce, 'Valid') if not res['OK']: gLogger.error("Failed to report back: %s" %res['Message']) message = res['Message'] else: gLogger.notice("Tag now Valid!") if message: return S_ERROR(message) return S_OK()
else: gLogger.notice("Found the tags:", res['Value']) res = sw.updateStatus(mytag, mysite, "Probing") if not res['OK']: gLogger.error(res['Message']) else: gLogger.notice("Updated %s at %s to %s" % (mytag, mysite, "Probing")) res = getQueues(siteList = [mysite]) if not res['OK']: gLogger.error(res['Message']) dexit(0) cetest = res['Value'][mysite].keys()[0] res = sw.updateCEStatus(mytag, cetest, "Valid") if not res['OK']: gLogger.error(res['Message']) else: gLogger.notice("Updated %s to %s at %s" %(mytag, "Valid", cetest)) #try again now that at least one CE as a Valid tag res = sw.getSitesForTag(mytag) if not res['OK']: gLogger.error(res['Message']) else: gLogger.notice("Sites for tag: ", res['Value']) #Remove the association tag-site (mark as removed) res = sw.removeTagAtSite(mytag,mysite) if not res['OK']:
class SoftwareMonitorAgent(AgentModule): """ This agent picks up "New" tags and submits jobs and those that are OK will report back to the service directly. For now it enforces the transition from New to Probing to Valid. Also resets the tags that have been Probing for too long to New """ def initialize(self): """ Initialize the agent. """ self.am_setOption( "PollingTime", 86400 ) #Once a day is enough self.swtc = SoftwareTagClient() self.submitjobs = self.am_getOption( 'SubmitJobs', False ) if self.submitjobs: self.log.info("Will submit probe jobs to validate the software tags") else: self.log.info("Will mark as Valid all 'New' tags directly.") self.delay = self.am_getOption("Delay", 86400) self.log.info("Will reset to 'New' the tasks that have been 'Probing' for %s seconds" % self.delay) self.script = self.am_getOption("SoftwareManagementScript", "ProbeSoftwareArea.py") self.am_setOption( 'shifterProxy', 'SoftwareManager' ) #Needs to be able to submit job for that VO return S_OK() def execute(self): """ Get all New tags, mark them as Installing. Old Installing tags are reset to New """ res = self.swtc.getTagsWithStatus("New") if not res['OK']: return res if not res['Value']: self.log.info("No 'New' tags to consider") for tag, ces in res['Value'].items(): for ce in ces: res = self.swtc.updateCEStatus(tag, ce, 'Installing') if not res['OK']: self.log.error(res['Message']) continue res = None if self.submitjobs: res = self.submitProbeJobs(ce) else: res = self.swtc.updateCEStatus(tag, ce, 'Valid') if not res['OK']: self.log.error(res['Message']) else: self.log.info("Took care of %s at %s" %(tag, ce)) ##Also, reset to New tags that were in Probing for too long. res = self.swtc.getTagsWithStatus("Installing",olderthan=self.delay) if not res['OK']: self.log.error("Failed to get old 'Installing' tags") else: if not res['Value']: self.log.info("No 'Installing' tags to reset") for tag, ces in res['Value'].items(): for ce in ces: res = self.swtc.updateCEStatus(tag, ce, 'New') if not res['OK']: self.log.error(res['Message']) continue return S_OK() def submitProbeJobs(self, ce): """ Submit some jobs to the CEs """ #need credentials, should be there since the initialize from DIRAC.Interfaces.API.Dirac import Dirac d = Dirac() from DIRAC.Interfaces.API.Job import Job from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations import DIRAC ops = Operations() scriptname = ops.getValue("ResourceStatus/SofwareManagementScript", self.script) j = Job() j.setDestinationCE(ce) j.setCPUTime(1000) j.setName("Probe %s" % ce) j.setJobGroup("SoftwareProbe") j.setExecutable("%s/GlastDIRAC/ResourceStatusSystem/Client/%s" % (DIRAC.rootPath, scriptname), logFile='SoftwareProbe.log') j.setOutputSandbox('*.log') res = d.submit(j) if not res['OK']: return res return S_OK()
class SoftwareMonitorAgent(AgentModule): """ This agent picks up "New" tags and submits jobs and those that are OK will report back to the service directly. For now it enforces the transition from New to Probing to Valid. Also resets the tags that have been Probing for too long to New """ def initialize(self): """ Initialize the agent. """ self.am_setOption("PollingTime", 86400) #Once a day is enough self.swtc = SoftwareTagClient() self.submitjobs = self.am_getOption('SubmitJobs', False) if self.submitjobs: self.log.info( "Will submit probe jobs to validate the software tags") else: self.log.info("Will mark as Valid all 'New' tags directly.") self.delay = self.am_getOption("Delay", 86400) self.log.info( "Will reset to 'New' the tasks that have been 'Probing' for %s seconds" % self.delay) self.script = self.am_getOption("SoftwareManagementScript", "ProbeSoftwareArea.py") self.am_setOption('shifterProxy', 'SoftwareManager') #Needs to be able to submit job for that VO return S_OK() def execute(self): """ Get all New tags, mark them as Installing. Old Installing tags are reset to New """ #### get site mask ### diracAdmin = DiracAdmin() res = diracAdmin.getSiteMask(printOutput=False) if not res["OK"]: self.log.error("error retrieving site mask: %s" % str(res["Message"])) site_mask = res["Value"] res = self.swtc.getTagsWithStatus("New") if not res['OK']: return res if not res['Value']: self.log.info("No 'New' tags to consider") for tag, ces in res['Value'].items(): for ce in ces: res = getSiteForCEs([ce]) if not res["OK"]: self.log.error("could not retrieve Site name for CE %s" % ce) sites = res["Value"].keys() for site in sites: if site not in site_mask: self.log.info("CE/Site disabled %s" % site) continue # ignore this CE res = self.swtc.updateCEStatus(tag, ce, 'Installing') if not res['OK']: self.log.error(res['Message']) continue res = None if self.submitjobs: res = self.submitProbeJobs(ce) else: res = self.swtc.updateCEStatus(tag, ce, 'Valid') if not res['OK']: self.log.error(res['Message']) else: self.log.info("Done with %s at %s" % (tag, ce)) ##Also, reset to New tags that were in Probing for too long. res = self.swtc.getTagsWithStatus("Installing", self.delay) if not res['OK']: self.log.error("Failed to get old 'Installing' tags") else: if not res['Value']: self.log.info("No 'Installing' tags to reset") for tag, ces in res['Value'].items(): for ce in ces: res = self.swtc.updateCEStatus(tag, ce, 'New') if not res['OK']: self.log.error(res['Message']) continue return S_OK() def submitProbeJobs(self, ce): """ Submit some jobs to the CEs """ #need credentials, should be there since the initialize from DIRAC.Interfaces.API.Dirac import Dirac d = Dirac() from DIRAC.Interfaces.API.Job import Job from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations import os ops = Operations("glast.org") scriptname = ops.getValue("ResourceStatus/SofwareManagementScript", self.script) j = Job() j.setDestinationCE(ce) j.setCPUTime(1000) j.setName("Probe %s" % ce) j.setJobGroup("SoftwareProbe") j.setExecutable("%s/GlastDIRAC/ResourceStatusSystem/Client/%s" % (os.environ['DIRAC'], scriptname), logFile='SoftwareProbe.log') j.setOutputSandbox('*.log') res = d.submit(j) if not res['OK']: return res return S_OK()
else: gLogger.notice("Found the tags:", res['Value']) res = sw.updateStatus(mytag, mysite, "Probing") if not res['OK']: gLogger.error(res['Message']) else: gLogger.notice("Updated %s at %s to %s" % (mytag, mysite, "Probing")) res = getQueues(siteList=[mysite]) if not res['OK']: gLogger.error(res['Message']) dexit(0) cetest = res['Value'][mysite].keys()[0] res = sw.updateCEStatus(mytag, cetest, "Valid") if not res['OK']: gLogger.error(res['Message']) else: gLogger.notice("Updated %s to %s at %s" % (mytag, "Valid", cetest)) #try again now that at least one CE as a Valid tag res = sw.getSitesForTag(mytag) if not res['OK']: gLogger.error(res['Message']) else: gLogger.notice("Sites for tag: ", res['Value']) #Remove the association tag-site (mark as removed) res = sw.removeTagAtSite(mytag, mysite) if not res['OK']: