コード例 #1
0
ファイル: Infrastructure.py プロジェクト: supermanue/montera
	def createInfrastructureTasks(self, infrastructureTasks):	
		
		print ("---------------------")
		print ("---------------------")
		print ("---------------------")

		print ("CREATE INFRASTRUCTURE TASKS")
		
					
					
		hostsToProfile = []
		
		hostList =  obtainGWResources()
		for hostInfo in hostList:
			hostName = hostInfo.getElementsByTagName("HOSTNAME")[0].firstChild.data #TODO: remove "unicode" from TEXT
			
			try:
				foundArch = hostInfo.getElementsByTagName("ARCH")[0].firstChild.data
			except:
				foundArch=""
				
			try:	
				foundCpuMHz = int(hostInfo.getElementsByTagName("CPU_MHZ")[0].firstChild.data)
			except:
				foundCpuMHz = 0
			
			try:	
				foundLrms = hostInfo.getElementsByTagName("LRMS_NAME")[0].firstChild.data
			except:
				foundLrms = None
			
			try:	
				freeNodeCount = int(hostInfo.getElementsByTagName("FREENODECOUNT")[0].firstChild.data)
			except:
				freeNodeCount = 0	

			if foundLrms != None:
				if foundLrms == "jobmanager-pilot":			
					#solo tenemos en cuenta los pilots con al menos un slot disponible
					if not freeNodeCount > 0:
						continue
			
			#if a certain LRMS is desired, remove the hosts with a different one
			if self.lrms != None:
				if foundLrms != self.lrms:
					continue
				
			#if host is unknown, create a profiling task
			currentHost = self.getHost(hostName)
			if  currentHost == None:
				newHost = Host(hostName, arch=foundArch, cpuMHz = foundCpuMHz, lrms=foundLrms)
				self.hosts.append(newHost)
				hostsToProfile.append(newHost)
				#store new host on databae (faiulre resistance
				Session.add(newHost)
			#if information has changed, update host information
			elif (currentHost.arch != foundArch) or (currentHost.cpuMHz != foundCpuMHz):
				#TODO: pensar que hacer aqui. habria que eliminar el viejo o solo sobreescribir la información? Si se elimina el viejo, que pasa con las tareas ahí ejecutadas? No es trivial
				currentHost.arch = foundArch
				currentHost.cpuMHz = foundCpuMHz
				if currentHost.lrms == None:
					currentHost.lrms = foundLrms
				hostsToProfile.append(currentHost)
				Session.add(currentHost)
					
			elif currentHost.shouldBeProfiled():
				if currentHost.lrms == None:
					currentHost.lrms = foundLrms
				hostsToProfile.append(currentHost)

				
		#print("Host profiling: submission of 1 tasks per host")		
		hostProfilingTasks = [ExecutionManager.createHostProfilingTask(host) 
							for host in hostsToProfile
							for i in range(1)]
		
		
		
		siteTasks = []
		for task in hostProfilingTasks:
			found=False
			for gridTask in infrastructureTasks:
				if gridTask.host.hostname == task.host.hostname:
					found=True
					break
			if not found:
				siteTasks.append(task)
				
				
		#Esto es para el primer experimento de montera + gwpilot
		#queremos tener pilots funcionando, así que los arranco con esto 
		if self.lrms=="jobmanager-pilot":
			print ("creating fake profiling tasks")
			
			existingFakeTasks = len([task for task in infrastructureTasks if task.host.hostname=="" and task.status != "PENDING"])
			existingGoodPilots = len (self.getGoodHosts())
			existingProfilingTasks = len(hostProfilingTasks)
			#fakeTasksToCreate = base.maxRunningTasks - (existingFakeTasks + existingGoodPilots + existingProfilingTasks)
			fakeTasksToCreate = base.maxRunningTasks - existingFakeTasks
			
			print ("	Desired tasks: " + str(base.maxRunningTasks))
			print ("	Existing fake tasks: " + str(existingFakeTasks))
			print ("	Existing good pilots: " + str(existingGoodPilots))
			print ("	created: " + str(fakeTasksToCreate))
			
			emptyHost = FakeHost()
			fakeHostProfilingTasks = [ExecutionManager.createWakeUpask(emptyHost) 
						for i in range(fakeTasksToCreate)]
	
			siteTasks+=fakeHostProfilingTasks
		
				

		return siteTasks
コード例 #2
0
	def createInfrastructureTasks(self, infrastructureTasks):	
		
		print ("-------------------")
		print ("-------------------")

		print ("createInfrastructureTasks- NewPilotInfrastructure")

	#	self.showHosts()
					
		hostList = obtainGWResources()
		
		hostsToProfile = []

		print ("Analyzing resources ")
		for hostInfo in hostList:
			hostName = hostInfo.getElementsByTagName("HOSTNAME")[0].firstChild.data.strip().lower() #TODO: remove "unicode" from TEXT
			whetstones=0

			try:
				foundArch = hostInfo.getElementsByTagName("ARCH")[0].firstChild.data
			except:
				foundArch=""
				
			try:	
				foundCpuMHz = int(hostInfo.getElementsByTagName("CPU_MHZ")[0].firstChild.data)
			except:
				foundCpuMHz = 0
			
			try:	
				foundLrms = hostInfo.getElementsByTagName("LRMS_NAME")[0].firstChild.data.strip().lower()
			except:
				foundLrms = None
				print ("Could not find LRMS for host " + hostName + ", skipping it")
				continue
			
			try:	
				freeNodeCount = int(hostInfo.getElementsByTagName("FREENODECOUNT")[0].firstChild.data)
			except:
				freeNodeCount = 0	

			if foundLrms == "jobmanager-pilot":			
				#solo tenemos en cuenta los pilots con al menos un slot disponible
				if not freeNodeCount > 0:
					continue
				
				username = os.getenv("USER")
				genericStringArgs = hostInfo.getElementsByTagName("GENERIC_VAR_STR")
				for node in genericStringArgs:
					if node.attributes['NAME'].value =="PILOT_REAL_HOSTNAME":
						workerNode = node.attributes['VALUE'].value.strip().lower()
					if node.attributes['NAME'].value =="PILOT_REAL_RESOURCE":
						site = node.attributes['VALUE'].value.strip().lower()
				
				genericIntArgs = hostInfo.getElementsByTagName("GENERIC_VAR_INT")
				for node in genericIntArgs:
					if node.attributes['NAME'].value =="PILOT_" + username + "_VAR_5":
						whetstones = int(node.attributes['VALUE'].value.strip().lower())
						if whetstones > 65534: 
							whetstones = 0
				# 	whetstones = 0
			#if host is unknown, create a profiling task
			currentHost = self.getHost(hostName)
			if  currentHost == None:
				print ("Host/Pilot  not found. hostname: " + hostName + ", LRMS: " + foundLrms)
				if foundLrms == "jobmanager-pilot":
					#he encontrado un pilot:
					#primero busco e resource, y si no existe lo creo.
					#luego creo un pilot que utilice ese resource

					pilotResource = base.Session.query(PilotResource).filter(PilotResource.site == site, PilotResource.workerNode == workerNode).first()
					if pilotResource == None:
						print ("	PilotResource was not found, creating a new one")
						pilotResource = PilotResource(site, workerNode)
					print ("	Creating a new Pilot in NewPilotInfrastructure.createInfrastructureTasks")
					newHost = Pilot(hostName, arch=foundArch, cpuMHz = foundCpuMHz, pilotResource = pilotResource, whetstones = whetstones)
					self.pilots.append(newHost)
					Session.add(newHost)

				else:
					print ("	Creating a new Host in NewPilotInfrastructure.createInfrastructureTasks")
					newHost = Host(hostName, arch=foundArch, cpuMHz = foundCpuMHz, lrms=foundLrms)
					self.hosts.append(newHost)
					Session.add(newHost)

				#ESTO ES PARA HACER EL PROFILING DE LOS PILOT SI NO HAN PUBLICADO LOS WHETSTONES, SI NO NO HACE FALTA	
				#===============================================================
				# if whetstones == 0 or whetstones > 65534: 
				# 	whetstones = 0
				# 	print ("	Host to profile: " + hostName + ": whetstone value not initialized ")
				# 	hostsToProfile.append(newHost)
				# 	#store new host on databae (faiulre resistance
				# 	Session.add(newHost)
				#===============================================================
				
			#if information has changed, update host information
			elif (currentHost.getWhetstones() != whetstones):
				#va con un set porque es una operación más complicada, así que está encapsulada en esta funcion
				currentHost.setWhetstones(whetstones)	
				Session.add(currentHost)
				print ("Host: " + hostName + " UPDATED, new whetstones=" + str(whetstones))

			elif currentHost.lrms == None:
				currentHost.lrms = foundLrms


		#pprofiling of new sites		
		hostProfilingTasks = [ExecutionManager.createHostProfilingTask(host) 
							for host in hostsToProfile
							for i in range(base.profilingTasksPerHost)]
		
		

	
		#estamos asumiento que todos los pilots publican la variable esa con su 
		#rendimiento, con lo que no hay que hacer el profiling de nada. 		
				
		#AHORA, EN ESA NUEVA APROXIMACION, QUEREMOS TENER UNOS CUANTO SBENCHMARKS PARA IR ARRANCANDO PILOTS 
		print ("creating fake profiling tasks")
		
		existingFakeTasks = len([task for task in infrastructureTasks if task.host.hostname=="" and task.status != "PENDING"])
		existingGoodPilots = len (self.getGoodHosts())
		existingProfilingTasks = len(hostProfilingTasks)
		#fakeTasksToCreate = base.maxRunningTasks - (existingFakeTasks + existingGoodPilots + existingProfilingTasks)
		fakeTasksToCreate = base.maxRunningTasks - existingFakeTasks
		
		print ("	Desired tasks: " + str(base.maxRunningTasks))
		print ("	Existing fake tasks: " + str(existingFakeTasks))
		print ("	Existing good pilots: " + str(existingGoodPilots))
		print ("	created: " + str(fakeTasksToCreate))
		
		emptyHost = FakeHost()
		fakeHostProfilingTasks = [ExecutionManager.createFakeHostProfilingTask(emptyHost) 
					for i in range(fakeTasksToCreate)]

		hostProfilingTasks+=fakeHostProfilingTasks
		
		
		return hostProfilingTasks