Example #1
0
 def testScheddQueryPoll(self):
     self.launch_daemons(["SCHEDD", "COLLECTOR", "STARTD", "NEGOTIATOR"])
     output_file = os.path.join(testdir, "test.out")
     if os.path.exists(output_file):
         os.unlink(output_file)
     schedd = htcondor.Schedd()
     ad = classad.parseOne(open("tests/submit.ad"))
     ads = []
     cluster = schedd.submit(ad, 10, False, ads)
     for i in range(60):
         ads_iter = schedd.xquery("ClusterId == %d" % cluster,
                                  ["JobStatus"],
                                  name="query1")
         ads_iter2 = schedd.xquery("ClusterId == %d" % cluster,
                                   ["JobStatus"],
                                   name="query2")
         ads = []
         for query in htcondor.poll([ads_iter, ads_iter2]):
             self.assertTrue(query.tag() in ["query1", "query2"])
             ads += query.nextAdsNonBlocking()
         #print ads
         if len(ads) == 0:
             break
         if i % 2 == 0:
             schedd.reschedule()
Example #2
0
    def q(self, constraint=True):
        # type: Union[bool, str] -> defaultdict
        """Return list of running and idle condor jobs (CLI condor_q)."""
        condor_q = defaultdict(list)
        queries = [schedd.xquery(requirements="%s && %s" % (self.__q_requirement_string, constraint),
                                 projection=["ClusterId", "ProcId", "RequestCpus", "JobStatus"])
                   for schedd in self.schedds]

        for query in htcondor.poll(queries):
            for ads in query.nextAdsNonBlocking():
                key = "%s.%s" % (ads.get("ClusterId"), ads.get("ProcId"))
                condor_q[key].append(int(ads.get("RequestCpus")))
                condor_q[key].append(int(ads.get("JobStatus")))

        return condor_q
Example #3
0
    def q(self, constraint=True):
        # type: Union[bool, str] -> defaultdict
        """Return list of running and idle condor jobs (CLI condor_q)."""
        condor_q = defaultdict(list)
        queries = [
            schedd.xquery(
                requirements="%s && %s" %
                (self.__q_requirement_string, constraint),
                projection=["ClusterId", "ProcId", "RequestCpus", "JobStatus"])
            for schedd in self.schedds
        ]

        for query in htcondor.poll(queries):
            for ads in query.nextAdsNonBlocking():
                key = "%s.%s" % (ads.get("ClusterId"), ads.get("ProcId"))
                condor_q[key].append(int(ads.get("RequestCpus")))
                condor_q[key].append(int(ads.get("JobStatus")))

        return condor_q
Example #4
0
 def testScheddQueryPoll(self):
     self.launch_daemons(["SCHEDD", "COLLECTOR", "STARTD", "NEGOTIATOR"])
     output_file = os.path.join(testdir, "test.out")
     if os.path.exists(output_file):
         os.unlink(output_file)
     schedd = htcondor.Schedd()
     ad = classad.parseOne(open("tests/submit.ad"))
     ads = []
     cluster = schedd.submit(ad, 10, False, ads)
     for i in range(60):
         ads_iter = schedd.xquery("ClusterId == %d" % cluster, ["JobStatus"], name="query1")
         ads_iter2 = schedd.xquery("ClusterId == %d" % cluster, ["JobStatus"], name="query2")
         ads = []
         for query in htcondor.poll([ads_iter, ads_iter2]):
             self.assertTrue(query.tag() in ["query1", "query2"])
             ads += query.nextAdsNonBlocking()
         #print ads
         if len(ads) == 0:
             break
         if i % 2 == 0:
             schedd.reschedule()
Example #5
0
    def extractData(self):
        # Initialize the data for the main table
        data = {
            'running': 0,
            'idle': 0,
            'cores': 0,
            'ram': 0,
            'requested_memory': 0,
            'efficiency': 0,
            'qtime': [],  # format changed to 'int' in the end
            'remote': 0,
            'filename_plot': ''
        }

        # Retrieve user priority information
        for ad in self.negotiator.getPriorities():
            name = ad.get("Name").replace(ad.get("AccountingGroup") + ".", "")
            last_prio = self.priorities.setdefault(name, 500.0)
            self.priorities[name] = max(ad.get("Priority"), last_prio)

        # Extract job information using htcondor python bindings
        for query in htcondor.poll(self.queries):
            for ads in query:
                job_id = ads.get("GlobalJobId")
                self.condor_jobs_information[job_id] = {
                    quantity: ads.get(quantity)
                    for quantity in self.quantities_list
                }

        # Fill the main table and the user statistics information
        for jobid, job in self.condor_jobs_information.iteritems():
            # Count remotable jobs
            if job["RemoteJob"]:
                data["remote"] += 1
            # Determine user and set up user dependent statistics
            user = job["User"]
            if user not in self.user_statistics:
                self.user_statistics[user] = copy.deepcopy(
                    self.user_statistics_dict)
            # Count used RAM in MiB
            if job["ResidentSetSize"] is None:
                pass
            else:
                self.user_statistics[user][
                    "ram"] += job["ResidentSetSize"] / 1024.
                data["ram"] += job["ResidentSetSize"] / 1024.
            # Count requested RAM in MiB
            self.user_statistics[user]["requested_memory"] += job[
                "RequestMemory"]
            data["requested_memory"] += job["RequestMemory"]
            # Count used cores
            self.user_statistics[user]["cores"] += job["RequestCpus"]
            data["cores"] += job["RequestCpus"]

            # Get information on network traffic.
            if type(job["NetworkInputMb"]) == types.NoneType:
                pass
            else:
                self.user_statistics[user]["NetworkInputMb"] += job[
                    "NetworkInputMb"]
            if type(job["NetworkOutputMb"]) == types.NoneType:
                pass
            else:
                self.user_statistics[user]["NetworkOutputMb"] += job[
                    "NetworkOutputMb"]
            # Get information on input files.
            self.user_statistics[user]["transfer"] += job[
                "TransferInputSizeMB"]
            # Summarize the status information
            status = self.jobs_status_dict.get(job["JobStatus"])
            self.user_statistics[user][status] += 1
            if status in data:
                data[status] += 1
            # Calculate the time in the queue for all jobs in seconds
            if status == "running":
                try:
                    data["qtime"].append(
                        max(0, job["JobStartDate"] - job["QDate"]))
                except Exception:
                    pass
            # Determine the sites the user is running his jobs on
            job["MachineAttrCloudSite0"] = "Undefined" if job[
                "MachineAttrCloudSite0"] is None else job[
                    "MachineAttrCloudSite0"]
            if job["MachineAttrCloudSite0"].lower(
            ) not in self.user_statistics[user][
                    "sites"] and status == "running":
                self.user_statistics[user]["sites"].append(
                    job["MachineAttrCloudSite0"].lower())
            # Calculate runtimes, cputimes and efficiencies of each job of a user
            if status == "running":
                try:
                    cputime = job["RemoteUserCpu"] + job["RemoteSysCpu"]
                    runtime = job["RequestCpus"] * (job["ServerTime"] -
                                                    job["JobStartDate"])
                    efficiency = float(cputime) / float(runtime)
                    # Avoiding not up to date values of JobCurrentStartDate, that result in efficiencies bigger than 1
                    if efficiency <= 1.:
                        self.user_statistics[user]["efficiencies"].append(
                            efficiency)
                except Exception:
                    pass

        all_efficiencies = []
        for user in self.user_statistics:
            user_data = {"batchsystem_user": user}
            for status in self.jobs_status_dict.itervalues():
                user_data[status] = self.user_statistics[user][status]
            user_data["cores"], user_data["ram"] = self.user_statistics[user][
                "cores"], self.determine_diskspace(
                    self.user_statistics[user]["ram"], given_unit="MiB")
            user_data["requested_memory"] = max(
                1,
                self.determine_diskspace(
                    self.user_statistics[user]["requested_memory"],
                    given_unit="MiB"))
            user_data["efficiency"] = round(np.mean(self.user_statistics[user]["efficiencies"]),2) \
             if len(self.user_statistics[user]["efficiencies"]) > 0 else 1.0
            all_efficiencies += self.user_statistics[user]["efficiencies"]
            user_data["sites"] = ",\n".join(
                self.user_statistics[user]["sites"])
            user_data["priority"] = round(self.priorities[user], 1)
            user_data["NetworkInputMb"] = round(
                self.user_statistics[user]["NetworkInputMb"], 2)
            user_data["NetworkOutputMb"] = round(
                self.user_statistics[user]["NetworkOutputMb"], 2)
            user_data["transfer"] = self.user_statistics[user][
                "NetworkOutputMb"]
            self.statistics_db_value_list.append(user_data)

        data["efficiency"] = round(np.mean(all_efficiencies),
                                   2) if len(all_efficiencies) > 0 else 1.0
        data["ram"] = self.determine_diskspace(data["ram"], given_unit="MiB")
        data["requested_memory"] = max(
            1,
            self.determine_diskspace(data["requested_memory"],
                                     given_unit="MiB"))
        if len(data["qtime"]) != 0:
            data["qtime"] = str(timedelta(seconds=int(np.mean(data["qtime"]))))
        else:
            data["qtime"] = str(0)

        # Plot creation for user statistics
        data["filename_plot"] = self.plot()

        # Overall status calculation
        efficiency_status = 0.0
        if data["efficiency"] >= float(self.config["efficiency_warning"]):
            efficiency_status = 1.0
        elif data["efficiency"] >= float(self.config["efficiency_critical"]):
            efficiency_status = 0.5
        ram_status = 0.5 if float(data["ram"]) / float(data["cores"]) > float(
            self.config["ram_requested_memory_ratio"]) else 1.0
        queue_time_status = 0.0 if "day" in data["qtime"] else 1.0
        data["status"] = min(efficiency_status, ram_status, queue_time_status)
        return data
	def extractData(self):
		# Initialize the data for the main table
		data = {
			'running': 0,
                	'idle': 0,
                	'cores': 0,
                	'ram': 0,
			'requested_memory' : 0,
                	'efficiency': 0,
                	'qtime': [], # format changed to 'int' in the end
                	'remote': 0,
			'filename_plot' : ''
		}

		# Retrieve user priority information
		for ad in self.negotiator.getPriorities():
			name = ad.get("Name").replace(ad.get("AccountingGroup")+".","")
			last_prio = self.priorities.setdefault(name,500.0)
			self.priorities[name]= max(ad.get("Priority"),last_prio)

		# Extract job information using htcondor python bindings
		for query in htcondor.poll(self.queries):
			for ads in query:
				job_id = ads.get("GlobalJobId")
				self.condor_jobs_information[job_id] = {quantity : ads.get(quantity) for quantity in self.quantities_list}

		# Fill the main table and the user statistics information
		for jobid, job in self.condor_jobs_information.iteritems():
			# Count remotable jobs
			if job["RemoteJob"]:
				data["remote"] += 1
			# Determine user and set up user dependent statistics
			user = job["User"]
			if user not in self.user_statistics:
				self.user_statistics[user] = copy.deepcopy(self.user_statistics_dict)
			# Count used RAM in MiB
			if job["ResidentSetSize"] is None:
				pass
			else:
				self.user_statistics[user]["ram"] += job["ResidentSetSize"]/1024.
				data["ram"] += job["ResidentSetSize"]/1024.
			# Count requested RAM in MiB
			self.user_statistics[user]["requested_memory"] += job["RequestMemory"]
			data["requested_memory"] += job["RequestMemory"]
			# Count used cores
			self.user_statistics[user]["cores"] += job["RequestCpus"]
			data["cores"] += job["RequestCpus"]
			
			# Get information on network traffic.
			if type(job["NetworkInputMb"]) == types.NoneType:
				pass
			else:
				self.user_statistics[user]["NetworkInputMb"] += job["NetworkInputMb"]
			if type(job["NetworkOutputMb"]) == types.NoneType:
				pass
			else:
				self.user_statistics[user]["NetworkOutputMb"] += job["NetworkOutputMb"]
			# Get information on input files.
			self.user_statistics[user]["transfer"] += job["TransferInputSizeMB"]
			# Summarize the status information
			status = self.jobs_status_dict.get(job["JobStatus"])
			self.user_statistics[user][status] +=1
			if status in data:
				data[status] += 1
			# Calculate the time in the queue for all jobs in seconds
			if status == "running": 
				try:
					data["qtime"].append(max(0, job["JobStartDate"] - job["QDate"]))
				except Exception:
					pass
			# Determine the sites the user is running his jobs on
			job["MachineAttrCloudSite0"] = "Undefined" if job["MachineAttrCloudSite0"] is None else job["MachineAttrCloudSite0"]
			if job["MachineAttrCloudSite0"].lower() not in self.user_statistics[user]["sites"] and status == "running":
				self.user_statistics[user]["sites"].append(job["MachineAttrCloudSite0"].lower())
			# Calculate runtimes, cputimes and efficiencies of each job of a user
			if status == "running":
				try:	
					cputime = job["RemoteUserCpu"] + job["RemoteSysCpu"]
					runtime = job["RequestCpus"] * (job["ServerTime"] - job["JobStartDate"])
					efficiency = float(cputime)/float(runtime)
					# Avoiding not up to date values of JobCurrentStartDate, that result in efficiencies bigger than 1
					if efficiency <= 1.:
						self.user_statistics[user]["efficiencies"].append(efficiency)
				except Exception:
					pass

		all_efficiencies = []
		for user in self.user_statistics:
			user_data = {"batchsystem_user": user}
			for status in self.jobs_status_dict.itervalues():
				user_data[status] = self.user_statistics[user][status]
			user_data["cores"],user_data["ram"] = self.user_statistics[user]["cores"], self.determine_diskspace(self.user_statistics[user]["ram"], given_unit="MiB")
			user_data["requested_memory"] = max(1,self.determine_diskspace(self.user_statistics[user]["requested_memory"], given_unit = "MiB"))
			user_data["efficiency"] = round(np.mean(self.user_statistics[user]["efficiencies"]),2) \
				if len(self.user_statistics[user]["efficiencies"]) > 0 else 1.0
			all_efficiencies += self.user_statistics[user]["efficiencies"]
			user_data["sites"] = ",\n".join(self.user_statistics[user]["sites"])
			user_data["priority"] = round(self.priorities[user],1)
			user_data["NetworkInputMb"] = round(self.user_statistics[user]["NetworkInputMb"],2)
			user_data["NetworkOutputMb"] = round(self.user_statistics[user]["NetworkOutputMb"],2)
			user_data["transfer"] = self.user_statistics[user]["NetworkOutputMb"]
			self.statistics_db_value_list.append(user_data)

		data["efficiency"] = round(np.mean(all_efficiencies),2) if len(all_efficiencies)> 0 else 1.0
		data["ram"] = self.determine_diskspace(data["ram"], given_unit="MiB")
		data["requested_memory"] = max(1,self.determine_diskspace(data["requested_memory"], given_unit = "MiB"))
		if len(data["qtime"]) != 0:
			data["qtime"] = str(timedelta(seconds=int(np.mean(data["qtime"]))))
		else:
			 data["qtime"] = str(0)		

		# Plot creation for user statistics
		data["filename_plot"] = self.plot()

		# Overall status calculation
		efficiency_status = 0.0
		if data["efficiency"] >= float(self.config["efficiency_warning"]):
			efficiency_status = 1.0
		elif data["efficiency"] >= float(self.config["efficiency_critical"]):
			efficiency_status = 0.5
		ram_status = 0.5 if float(data["ram"])/float(data["cores"]) > float(self.config["ram_requested_memory_ratio"]) else 1.0
		queue_time_status = 0.0 if "day" in data["qtime"] else 1.0
		data["status"] = min(efficiency_status, ram_status, queue_time_status)
		return data
Example #7
0
jobs = []
for schedd_ad in htcondor.Collector().locateAll(htcondor.DaemonTypes.Schedd):
    schedd = htcondor.Schedd(schedd_ad)
	jobs += schedd.xquery()
print(len(jobs))

# yield the matching jobs;
queries = []
coll_query = htcondor.Collector().locate(htcondor.AdTypes.Schedd)
for schedd_ad in coll_query:
	schedd_obj = htcondor.Schedd(schedd_ad)
	queries.append(schedd_obj.xquery())

# 返回可用结果
jobs_count = {}
for query in htcondor.poll(queries):
	schedd_name = query.tag()	
	job_counts.setdefault(schedd_name, 0)
	count = len(query.nextAdsNonBlocking())
	job_counts[schedd_name] += count
	print "Got %d results from %s." % (count, schedd_name)

print(job_counts)

# 历史查询
# suspend() 挂起
for ad in schedd.history('true', ['ProcId', 'ClusterId', 'JobStatus', 'WallDuration'], 2):
	print(ad)