def _run(self): lshosts = self.params.get("lshosts", "lshosts") cmd = lshosts + " -w" info.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("lshosts failed: " + output) lshostsRecords = {} lines = output.split("\n") for index in range(1, len(lines)): rec = LsHostsRecord(lines[index]) lshostsRecords[rec.hostName] = rec bhosts = self.params.get("bhosts", "bhosts") cmd = bhosts + " -w" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("bhosts failed: " + output) bhostsRecords = {} lines = output.split("\n") for index in range(1, len(lines)): rec = BHostsRecord(lines[index]) bhostsRecords[rec.hostName] = rec all_hosts = [] for host in list(lshostsRecords.keys()): lshost = lshostsRecords.get(host) bhost = bhostsRecords.get(host) if bhost == None: info.warn("no bhost record found for " + host) break all_hosts.append(self._getHost(lshost, bhost)) hosts = [] for host in all_hosts: if self._goodHost(host): hosts.append(host) return self._groupHosts(hosts)
def run(self): try: self.host = self.params["host"] except KeyError: raise StepError("host not specified") try: self.port = self.params["port"] except KeyError: self.port = 80 try: self.method = self.params["method"] except KeyError: self.method = "PUT" try: self.path = self.params["path"] except ConfigParser.Error: raise StepError("path not specified") PublishStep.run(self)
def _getDuration(dstr): m = re.search("(\d+)-(\d+):(\d+):(\d+)", dstr) if m is not None: return int( m.group(4)) + 60 * (int(m.group(3)) + 60 * (int(m.group(2)) + 24 * int(m.group(1)))) m = re.search("(\d+):(\d+):(\d+)", dstr) if m is not None: return int(m.group(3)) + 60 * (int(m.group(2)) + 60 * int(m.group(1))) raise StepError("failed to parse duration: %s" % dstr)
def addInfo(self, job): try: qstat = self.params["qstat"] except KeyError: qstat = "qstat" cmd = qstat + " -xml -s prsz -j " + job.LocalIDFromManager self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("qstat failed: " + output + "\n") parseJLines(output, {job.LocalIDFromManager: job}, self)
def run(self): try: site_name = self.params["site_name"] except KeyError: tg_whereami = self.params.get("tgwhereami","tgwhereami") (status, output) = subprocess.getstatusoutput(tg_whereami+" -s") if status != 0: raise StepError("failed to execute %s: %s" % (tg_whereami,output)) site_name = output self._output(ipf.sysinfo.SiteName(site_name))
def _run(self): try: dir_name = self.params["server_logs_dir"] except KeyError: if "PBS_HOME" not in os.environ: raise StepError( "server_logs_dir not specified and the PBS_HOME environment variable is not set" ) dir_name = os.path.join(os.environ["PBS_HOME"], "spool", "server_logs") if not os.path.exists(dir_name): dir_name = os.path.join(os.environ["PBS_HOME"], "server_logs") if not os.path.exists(dir_name): raise StepError( "could not find server_logs dir starting from the directory PBS_HOME" ) watcher = LogDirectoryWatcher(self._logEntry, dir_name, self.position_file) watcher.run()
def _trigger(self, representation): try: workflow_file = self.params["workflow"] except KeyError: raise StepError("required parameter 'workflow' not specified") self.info("running workflow %s",workflow_file) # error if import is above from ipf.engine import WorkflowEngine engine = WorkflowEngine() engine.run(workflow_file)
def _run(self): query_priority = self.params.get("query_priority", "query_priority") jobs = self._getInput( computing_activity.ComputingActivities).activities job_map = {} for job in jobs: job_map[job.LocalIDFromManager] = job self.debug("running " + query_priority) status, output = subprocess.getstatusoutput(query_priority) if status != 0: raise StepError("'%s' failed: %s\n" % (query_priority, output)) state = None for line in output.splitlines(): if "IDLE:" in line: state = "pending" continue if "NON-QUEUED:" in line: state = "held" continue if state is None: continue toks = line.split() id = toks[0].split(".")[ 0] # remove submit host, if one is included if toks[1].endswith("*"): # a job had this priority: 100000000000000000000000000010* # and was the highest priority job, so: priority = sys.maxsize else: priority = int(toks[1]) try: # torque qstat shows a single JOB_ID[] for a job array, but catalina has multiple JOB_ID[##] m = re.search("(\S+)\[\d+\]", id) if m is not None: id = m.group(1) + "[]" job_map[id].Extension["Priority"] = priority if state == "held": if job_map[id].State[ 0] == computing_activity.ComputingActivity.STATE_PENDING: job_map[id].State[ 0] = computing_activity.ComputingActivity.STATE_HELD except KeyError: self.warning("didn't find job %s in resource manager jobs", id) jobs = sorted(jobs, key=self._jobPriority) jobs = sorted(jobs, key=self._jobStateKey) return jobs
def _connectIfNecessary(self): if self.channel is not None: return for i in range(0, len(self.services)): service = self._selectService() try: self._connect(service) return except Exception as e: self.warning("failed to connect to service %s: %s", service, e) raise StepError( "could not connect to any of the specified messaging services")
def run(self): try: site_name = self.params["site_name"] except KeyError: xdresourceid = self.params.get("xdresourceid", "xdresourceid") (status, output) = subprocess.getstatusoutput(xdresourceid + " -s") if status != 0: raise StepError("failed to execute %s: %s" % (xdresourceid, output)) site_name = output self._output(ipf.sysinfo.SiteName(site_name))
def run(self): try: site_name = self.params["site_name"] except KeyError: host_name = socket.getfqdn() # assumes that the site name is all except first component try: index = host_name.index(".") + 1 except ValueError: raise StepError("host name does not appear to be fully qualified") site_name = host_name[index:] self._output(SiteName(site_name))
def _setParameters(self, workflow_params, step_params): Step._setParameters(self,workflow_params,step_params) trigger_names = self.params.get("trigger",[]) from ipf.catalog import catalog # can't import this at the top - circular import for name in trigger_names: try: rep_class = catalog.representations[name] self.trigger.append(rep_class) except KeyError: raise StepError("unknown representation %s" % name) if not rep_class.data_cls in self.requires: self.requires.append(rep_class.data_cls)
def _getAuthentication(step): try: username = step.params["username"] except KeyError: try: username = os.environ["OS_USERNAME"] except KeyError: raise StepError( "username parameter not provided and OS_USERNAME not set in the environment" ) try: password = step.params["password"] except KeyError: try: password = os.environ["OS_PASSWORD"] except KeyError: raise StepError( "password parameter not provided and OS_PASSWORD not set in the environment" ) try: tenant = step.params["tenant"] except KeyError: try: tenant = os.environ["OS_TENANT_NAME"] except KeyError: raise StepError( "tenant parameter not provided and OS_TENANT_NAME not set in the environment" ) try: auth_url = step.params["auth_url"] except KeyError: try: auth_url = os.environ["OS_AUTH_URL"] except KeyError: raise StepError( "auth_url parameter not provided and OS_AUTH_URL not set in the environment" ) return (username, password, tenant, auth_url)
def _run(self): try: qstat = self.params["qstat"] except KeyError: qstat = "qstat" # the output of -u is in schedule order cmd = qstat + " -xml -pri -s prsz -u \\*" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: self.error("qstat failed: " + output + "\n") raise StepError("qstat failed: " + output + "\n") uhandler = JobsUHandler(self) xml.sax.parseString(output, uhandler) jobs = {} for job in uhandler.jobs: jobs[job.LocalIDFromManager] = job cmd = qstat + " -xml -s prsz -j \\*" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: self.error("qstat failed: " + output + "\n") raise StepError("qstat failed: " + output + "\n") # dom parsing was slow # sax parsing failed sometimes parseJLines(output, jobs, self) jobList = [] for job in uhandler.jobs: if self._includeQueue(job.Queue): jobList.append(job) return jobList
def _run(self): self.info("running") jobs = pbs.ComputingActivitiesStep._run(self) try: job_list_file = self.params["job_list_file"] except KeyError: raise StepError("job_list_file not specified") try: f = open(job_list_file, "r") lines = f.readlines() f.close() except IOError as e: raise StepError("couldn't read job list from file " + job_list_file) job_ids = [] for line in lines[1:]: toks = line.split() job_ids.append(toks[0]) job_dict = {} for job in jobs: job_dict[job.LocalIDFromManager] = job jobs = [] for job_id in job_ids: try: jobs.append(job_dict[job_id]) del job_dict[job_id] except KeyError: self.warning("didn't find job " + job_id + " in job list") for job_id in list(job_dict.keys()): self.warning("didn't find an entry in job list for PBS job " + job_id) return jobs
def _fromNimbusAdmin(self): try: nimbus_admin = os.path.join(self.params["nimbus_dir"], "bin", "nimbus-admin") except KeyError: nimbus_admin = "nimbus-admin" cmd = nimbus_admin + " -l" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("nimbus-admin failed: " + output + "\n") vm_strings = output.split("\n\n") return list(map(self._activityFromAdmin, vm_strings))
def _run(self): try: nimbus_nodes = os.path.join(self.params["nimbus_dir"], "bin", "nimbus-nodes") except KeyError: nimbus_nodes = "nimbus-nodes" cmd = nimbus_nodes + " -l" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("nimbus-nodes failed: " + output + "\n") nodeStrings = output.split("\n\n") return self._groupHosts(list(map(self._getNode, nodeStrings)))
def _run(self): try: self.nimbus_dir = self.params["nimbus_dir"] except KeyError: raise StepError("nimbus_dir parameter not specified") try: return self._fromNimbusAdmin() except StepError as e: # probably an older Nimbus version without the nimbus-admin command # don't bother to pull node assignments out of services.log self.info( "getting activities from current-reservations.txt instead of nimbus-admin: %s", str(e)) return self._fromCurrentReservations()
def _run(self): pbsnodes = self.params.get("pbsnodes", "pbsnodes") cmd = pbsnodes + " -a" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: self.error("pbsnodes failed: " + output) raise StepError("pbsnodes failed: " + output + "\n") nodeStrings = output.split("\n\n") hosts = list(map(self._getHost, nodeStrings)) hosts = list(filter(self._testProperties, hosts)) hosts = list(filter(self._goodHost, hosts)) return self._groupHosts(hosts)
def _run(self): self.info("running") step = ComputingActivitiesStep( ) # use ComputingActivitiesStep to initialize cache of activities step.setParameters({}, self.params) for activity in step._run(): self.activities[activity.LocalIDFromManager] = activity try: nimbus_dir = self.params["nimbus_dir"] except KeyError: raise StepError("nimbus_dir parameter not specified") log_file = os.path.join(nimbus_dir, "var", "services.log") watcher = LogFileWatcher(self._logEntry, log_file, self.position_file) watcher.run()
def _run(self): service = computing_service.ComputingService() #service.Name = "PBS" # service.Capability = ["executionmanagement.jobexecution", # "executionmanagement.jobdescription", # "executionmanagement.jobmanager", # "executionmanagement.executionandplanning", # "executionmanagement.reservation", # ] #service.Type = "ipf.PBS" #service.QualityLevel = "production" module_paths = [] try: paths = os.environ["SERVICEPATH"] module_paths.extend(paths.split(":")) except KeyError: raise StepError("didn't find environment variable SERVICEPATH") for path in module_paths: try: packages = os.listdir(path) except OSError: continue for name in packages: print("name of package is" + name) if name.startswith("."): continue if not os.path.isdir(os.path.join(path, name)): # assume these are modules that just import other modules continue for file_name in os.listdir(os.path.join(path, name)): if file_name.startswith("."): continue if file_name.endswith("~"): continue if file_name.endswith(".lua"): self._addModule(os.path.join(path, name, file_name), name, file_name[:len(file_name) - 4], apps) else: self.info("calling addmodule w/ version") self._addModule(os.path.join(path, name, file_name), name, file_name, service) return service
def _run(self): qstat = self.params.get("qstat", "qstat") cmd = qstat + " -Q -f -M" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: self.error("qstat failed: " + output) raise StepError("qstat failed: " + output + "\n") queueStrings = output.split("\n\n") queues = [] for queueString in queueStrings: queue = self._getQueue(queueString) if self._includeQueue(queue.Name): queues.append(queue) return queues
def _run(self): bjobs = self.params.get("bjobs", "bjobs") cmd = bjobs + " -a -l -u all" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("bjobs failed: " + output + "\n") jobStrings = output.split( "------------------------------------------------------------------------------" ) for jobString in jobStrings: job = self._getJob(jobString) if includeQueue(job.Queue): self.activities.append(job) return jobList
def _getDateTime(self, dt_str): # Example: 06/10/2012 16:17:41 m = re.search("(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)", dt_str) if m is None: raise StepError("can't parse '%s' as a date/time" % dt_str) month = int(m.group(1)) day = int(m.group(2)) year = int(m.group(3)) hour = int(m.group(4)) minute = int(m.group(5)) second = int(m.group(6)) return datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second, tzinfo=localtzoffset())
def _run(self): bqueues = self.params.get("bqueues", "bqueues") cmd = bqueues + " -l" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("bqueues failed: " + output + "\n") queues = [] queueStrings = output.split( "------------------------------------------------------------------------------" ) for queueString in queueStrings: queue = self._getQueue(queueString) if includeQueue(self.config, queue.Name): queues.append(queue) return queues
def _run(self): self.info("running") # if a site is generating a schedd_runlog, can use it to find jobs that are held because of dependencies try: reporting_file = self.params["reporting_file"] except KeyError: try: reporting_file = os.path.join(os.environ["SGE_ROOT"], "default", "common", "reporting") except KeyError: msg = "no reporting_file specified and the SGE_ROOT environment variable is not set" self.error(msg) raise StepError(msg) watcher = LogFileWatcher(self._logEntry, reporting_file, self.position_file) watcher.run()
def _run(self): condor_status = self.params.get("condor_status", "condor_status") cmd = condor_status + " -long" info.debug("running "+cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: raise StepError("condor_status failed: "+output+"\n") node_strings = output.split("\n\n") hosts = [] for node_string in node_strings: host = self._getHost(node_string) if self._goodHost(host): hosts.append(host) return self._groupHosts(hosts)
def _run(self): try: qconf = self.params["qconf"] except KeyError: qconf = "qconf" cmd = qconf + " -sq \**" self.debug("running " + cmd) status, output = subprocess.getstatusoutput(cmd) if status != 0: self.error("qconf failed: " + output + "\n") raise StepError("qconf failed: " + output + "\n") queues = [] queueStrings = output.split("\n\n") for queueString in queueStrings: queue = self._getQueue(queueString) if self._includeQueue(queue.Name): queues.append(queue) return queues
def run(self): try: self.minimum_interval = self.params["minimum_interval"] self.last_trigger = time.time() except KeyError: pass try: self.maximum_interval = self.params["maximum_interval"] self.next_trigger = time.time() + self.maximum_interval except KeyError: pass if len(self.trigger) == 0 and self.maximum_interval is None: raise StepError("You must specify at least one trigger or a maximum_interval") if len(self.trigger) == 0: self._runPeriodic() else: self._runTrigger()
def _run(self): try: self.exclude = self.params["exclude"].split(",") except KeyError: self.exclude = [] apps = application.Applications(self.resource_name) module_paths = [] try: paths = os.environ["MODULEPATH"] module_paths.extend(list(map(os.path.realpath, paths.split(":")))) except KeyError: raise StepError("didn't find environment variable MODULEPATH") for path in module_paths: self._addPath(path, path, module_paths, apps) return apps