def testScheddQueryPoll(self): self.launch_daemons(["SCHEDD", "COLLECTOR", "STARTD", "NEGOTIATOR"]) output_file = os.path.join(testdir, "test.out") if os.path.exists(output_file): os.unlink(output_file) schedd = htcondor.Schedd() ad = classad.parseOne(open("tests/submit.ad")) ads = [] cluster = schedd.submit(ad, 10, False, ads) for i in range(60): ads_iter = schedd.xquery("ClusterId == %d" % cluster, ["JobStatus"], name="query1") ads_iter2 = schedd.xquery("ClusterId == %d" % cluster, ["JobStatus"], name="query2") ads = [] for query in htcondor.poll([ads_iter, ads_iter2]): self.assertTrue(query.tag() in ["query1", "query2"]) ads += query.nextAdsNonBlocking() #print ads if len(ads) == 0: break if i % 2 == 0: schedd.reschedule()
def q(self, constraint=True): # type: Union[bool, str] -> defaultdict """Return list of running and idle condor jobs (CLI condor_q).""" condor_q = defaultdict(list) queries = [schedd.xquery(requirements="%s && %s" % (self.__q_requirement_string, constraint), projection=["ClusterId", "ProcId", "RequestCpus", "JobStatus"]) for schedd in self.schedds] for query in htcondor.poll(queries): for ads in query.nextAdsNonBlocking(): key = "%s.%s" % (ads.get("ClusterId"), ads.get("ProcId")) condor_q[key].append(int(ads.get("RequestCpus"))) condor_q[key].append(int(ads.get("JobStatus"))) return condor_q
def q(self, constraint=True): # type: Union[bool, str] -> defaultdict """Return list of running and idle condor jobs (CLI condor_q).""" condor_q = defaultdict(list) queries = [ schedd.xquery( requirements="%s && %s" % (self.__q_requirement_string, constraint), projection=["ClusterId", "ProcId", "RequestCpus", "JobStatus"]) for schedd in self.schedds ] for query in htcondor.poll(queries): for ads in query.nextAdsNonBlocking(): key = "%s.%s" % (ads.get("ClusterId"), ads.get("ProcId")) condor_q[key].append(int(ads.get("RequestCpus"))) condor_q[key].append(int(ads.get("JobStatus"))) return condor_q
def extractData(self): # Initialize the data for the main table data = { 'running': 0, 'idle': 0, 'cores': 0, 'ram': 0, 'requested_memory': 0, 'efficiency': 0, 'qtime': [], # format changed to 'int' in the end 'remote': 0, 'filename_plot': '' } # Retrieve user priority information for ad in self.negotiator.getPriorities(): name = ad.get("Name").replace(ad.get("AccountingGroup") + ".", "") last_prio = self.priorities.setdefault(name, 500.0) self.priorities[name] = max(ad.get("Priority"), last_prio) # Extract job information using htcondor python bindings for query in htcondor.poll(self.queries): for ads in query: job_id = ads.get("GlobalJobId") self.condor_jobs_information[job_id] = { quantity: ads.get(quantity) for quantity in self.quantities_list } # Fill the main table and the user statistics information for jobid, job in self.condor_jobs_information.iteritems(): # Count remotable jobs if job["RemoteJob"]: data["remote"] += 1 # Determine user and set up user dependent statistics user = job["User"] if user not in self.user_statistics: self.user_statistics[user] = copy.deepcopy( self.user_statistics_dict) # Count used RAM in MiB if job["ResidentSetSize"] is None: pass else: self.user_statistics[user][ "ram"] += job["ResidentSetSize"] / 1024. data["ram"] += job["ResidentSetSize"] / 1024. # Count requested RAM in MiB self.user_statistics[user]["requested_memory"] += job[ "RequestMemory"] data["requested_memory"] += job["RequestMemory"] # Count used cores self.user_statistics[user]["cores"] += job["RequestCpus"] data["cores"] += job["RequestCpus"] # Get information on network traffic. if type(job["NetworkInputMb"]) == types.NoneType: pass else: self.user_statistics[user]["NetworkInputMb"] += job[ "NetworkInputMb"] if type(job["NetworkOutputMb"]) == types.NoneType: pass else: self.user_statistics[user]["NetworkOutputMb"] += job[ "NetworkOutputMb"] # Get information on input files. self.user_statistics[user]["transfer"] += job[ "TransferInputSizeMB"] # Summarize the status information status = self.jobs_status_dict.get(job["JobStatus"]) self.user_statistics[user][status] += 1 if status in data: data[status] += 1 # Calculate the time in the queue for all jobs in seconds if status == "running": try: data["qtime"].append( max(0, job["JobStartDate"] - job["QDate"])) except Exception: pass # Determine the sites the user is running his jobs on job["MachineAttrCloudSite0"] = "Undefined" if job[ "MachineAttrCloudSite0"] is None else job[ "MachineAttrCloudSite0"] if job["MachineAttrCloudSite0"].lower( ) not in self.user_statistics[user][ "sites"] and status == "running": self.user_statistics[user]["sites"].append( job["MachineAttrCloudSite0"].lower()) # Calculate runtimes, cputimes and efficiencies of each job of a user if status == "running": try: cputime = job["RemoteUserCpu"] + job["RemoteSysCpu"] runtime = job["RequestCpus"] * (job["ServerTime"] - job["JobStartDate"]) efficiency = float(cputime) / float(runtime) # Avoiding not up to date values of JobCurrentStartDate, that result in efficiencies bigger than 1 if efficiency <= 1.: self.user_statistics[user]["efficiencies"].append( efficiency) except Exception: pass all_efficiencies = [] for user in self.user_statistics: user_data = {"batchsystem_user": user} for status in self.jobs_status_dict.itervalues(): user_data[status] = self.user_statistics[user][status] user_data["cores"], user_data["ram"] = self.user_statistics[user][ "cores"], self.determine_diskspace( self.user_statistics[user]["ram"], given_unit="MiB") user_data["requested_memory"] = max( 1, self.determine_diskspace( self.user_statistics[user]["requested_memory"], given_unit="MiB")) user_data["efficiency"] = round(np.mean(self.user_statistics[user]["efficiencies"]),2) \ if len(self.user_statistics[user]["efficiencies"]) > 0 else 1.0 all_efficiencies += self.user_statistics[user]["efficiencies"] user_data["sites"] = ",\n".join( self.user_statistics[user]["sites"]) user_data["priority"] = round(self.priorities[user], 1) user_data["NetworkInputMb"] = round( self.user_statistics[user]["NetworkInputMb"], 2) user_data["NetworkOutputMb"] = round( self.user_statistics[user]["NetworkOutputMb"], 2) user_data["transfer"] = self.user_statistics[user][ "NetworkOutputMb"] self.statistics_db_value_list.append(user_data) data["efficiency"] = round(np.mean(all_efficiencies), 2) if len(all_efficiencies) > 0 else 1.0 data["ram"] = self.determine_diskspace(data["ram"], given_unit="MiB") data["requested_memory"] = max( 1, self.determine_diskspace(data["requested_memory"], given_unit="MiB")) if len(data["qtime"]) != 0: data["qtime"] = str(timedelta(seconds=int(np.mean(data["qtime"])))) else: data["qtime"] = str(0) # Plot creation for user statistics data["filename_plot"] = self.plot() # Overall status calculation efficiency_status = 0.0 if data["efficiency"] >= float(self.config["efficiency_warning"]): efficiency_status = 1.0 elif data["efficiency"] >= float(self.config["efficiency_critical"]): efficiency_status = 0.5 ram_status = 0.5 if float(data["ram"]) / float(data["cores"]) > float( self.config["ram_requested_memory_ratio"]) else 1.0 queue_time_status = 0.0 if "day" in data["qtime"] else 1.0 data["status"] = min(efficiency_status, ram_status, queue_time_status) return data
def extractData(self): # Initialize the data for the main table data = { 'running': 0, 'idle': 0, 'cores': 0, 'ram': 0, 'requested_memory' : 0, 'efficiency': 0, 'qtime': [], # format changed to 'int' in the end 'remote': 0, 'filename_plot' : '' } # Retrieve user priority information for ad in self.negotiator.getPriorities(): name = ad.get("Name").replace(ad.get("AccountingGroup")+".","") last_prio = self.priorities.setdefault(name,500.0) self.priorities[name]= max(ad.get("Priority"),last_prio) # Extract job information using htcondor python bindings for query in htcondor.poll(self.queries): for ads in query: job_id = ads.get("GlobalJobId") self.condor_jobs_information[job_id] = {quantity : ads.get(quantity) for quantity in self.quantities_list} # Fill the main table and the user statistics information for jobid, job in self.condor_jobs_information.iteritems(): # Count remotable jobs if job["RemoteJob"]: data["remote"] += 1 # Determine user and set up user dependent statistics user = job["User"] if user not in self.user_statistics: self.user_statistics[user] = copy.deepcopy(self.user_statistics_dict) # Count used RAM in MiB if job["ResidentSetSize"] is None: pass else: self.user_statistics[user]["ram"] += job["ResidentSetSize"]/1024. data["ram"] += job["ResidentSetSize"]/1024. # Count requested RAM in MiB self.user_statistics[user]["requested_memory"] += job["RequestMemory"] data["requested_memory"] += job["RequestMemory"] # Count used cores self.user_statistics[user]["cores"] += job["RequestCpus"] data["cores"] += job["RequestCpus"] # Get information on network traffic. if type(job["NetworkInputMb"]) == types.NoneType: pass else: self.user_statistics[user]["NetworkInputMb"] += job["NetworkInputMb"] if type(job["NetworkOutputMb"]) == types.NoneType: pass else: self.user_statistics[user]["NetworkOutputMb"] += job["NetworkOutputMb"] # Get information on input files. self.user_statistics[user]["transfer"] += job["TransferInputSizeMB"] # Summarize the status information status = self.jobs_status_dict.get(job["JobStatus"]) self.user_statistics[user][status] +=1 if status in data: data[status] += 1 # Calculate the time in the queue for all jobs in seconds if status == "running": try: data["qtime"].append(max(0, job["JobStartDate"] - job["QDate"])) except Exception: pass # Determine the sites the user is running his jobs on job["MachineAttrCloudSite0"] = "Undefined" if job["MachineAttrCloudSite0"] is None else job["MachineAttrCloudSite0"] if job["MachineAttrCloudSite0"].lower() not in self.user_statistics[user]["sites"] and status == "running": self.user_statistics[user]["sites"].append(job["MachineAttrCloudSite0"].lower()) # Calculate runtimes, cputimes and efficiencies of each job of a user if status == "running": try: cputime = job["RemoteUserCpu"] + job["RemoteSysCpu"] runtime = job["RequestCpus"] * (job["ServerTime"] - job["JobStartDate"]) efficiency = float(cputime)/float(runtime) # Avoiding not up to date values of JobCurrentStartDate, that result in efficiencies bigger than 1 if efficiency <= 1.: self.user_statistics[user]["efficiencies"].append(efficiency) except Exception: pass all_efficiencies = [] for user in self.user_statistics: user_data = {"batchsystem_user": user} for status in self.jobs_status_dict.itervalues(): user_data[status] = self.user_statistics[user][status] user_data["cores"],user_data["ram"] = self.user_statistics[user]["cores"], self.determine_diskspace(self.user_statistics[user]["ram"], given_unit="MiB") user_data["requested_memory"] = max(1,self.determine_diskspace(self.user_statistics[user]["requested_memory"], given_unit = "MiB")) user_data["efficiency"] = round(np.mean(self.user_statistics[user]["efficiencies"]),2) \ if len(self.user_statistics[user]["efficiencies"]) > 0 else 1.0 all_efficiencies += self.user_statistics[user]["efficiencies"] user_data["sites"] = ",\n".join(self.user_statistics[user]["sites"]) user_data["priority"] = round(self.priorities[user],1) user_data["NetworkInputMb"] = round(self.user_statistics[user]["NetworkInputMb"],2) user_data["NetworkOutputMb"] = round(self.user_statistics[user]["NetworkOutputMb"],2) user_data["transfer"] = self.user_statistics[user]["NetworkOutputMb"] self.statistics_db_value_list.append(user_data) data["efficiency"] = round(np.mean(all_efficiencies),2) if len(all_efficiencies)> 0 else 1.0 data["ram"] = self.determine_diskspace(data["ram"], given_unit="MiB") data["requested_memory"] = max(1,self.determine_diskspace(data["requested_memory"], given_unit = "MiB")) if len(data["qtime"]) != 0: data["qtime"] = str(timedelta(seconds=int(np.mean(data["qtime"])))) else: data["qtime"] = str(0) # Plot creation for user statistics data["filename_plot"] = self.plot() # Overall status calculation efficiency_status = 0.0 if data["efficiency"] >= float(self.config["efficiency_warning"]): efficiency_status = 1.0 elif data["efficiency"] >= float(self.config["efficiency_critical"]): efficiency_status = 0.5 ram_status = 0.5 if float(data["ram"])/float(data["cores"]) > float(self.config["ram_requested_memory_ratio"]) else 1.0 queue_time_status = 0.0 if "day" in data["qtime"] else 1.0 data["status"] = min(efficiency_status, ram_status, queue_time_status) return data
jobs = [] for schedd_ad in htcondor.Collector().locateAll(htcondor.DaemonTypes.Schedd): schedd = htcondor.Schedd(schedd_ad) jobs += schedd.xquery() print(len(jobs)) # yield the matching jobs; queries = [] coll_query = htcondor.Collector().locate(htcondor.AdTypes.Schedd) for schedd_ad in coll_query: schedd_obj = htcondor.Schedd(schedd_ad) queries.append(schedd_obj.xquery()) # 返回可用结果 jobs_count = {} for query in htcondor.poll(queries): schedd_name = query.tag() job_counts.setdefault(schedd_name, 0) count = len(query.nextAdsNonBlocking()) job_counts[schedd_name] += count print "Got %d results from %s." % (count, schedd_name) print(job_counts) # 历史查询 # suspend() 挂起 for ad in schedd.history('true', ['ProcId', 'ClusterId', 'JobStatus', 'WallDuration'], 2): print(ad)