Esempio n. 1
0
	def next_work_unit(self, node):
		job_queue = defaultdict(list) 

		for unit in self.grid.get_queued():
			job_queue[unit.job.job_id].append(unit)

		if len(job_queue) == 0:
			return None

		# Point of differece from FCFS. Have to process
		# jobs before we can see what the earliest deadline is
		earliest_deadline = None
		earliest_job = None

		for job_id, units in job_queue.items():
		
			deadline = units[0].job.deadline
			wall_seconds = walltime.wall_secs(units[0].job.wall_time)
			time_left = deadline - wall_seconds

			# If we don't have a deadline, assign the
			# first job's deadline as earliest
			if earliest_deadline is None:
				earliest_deadline = time_left
				earliest_job = job_id

			# Handle case of >1 jobs with varying deadlines
			#elif deadline < earliest_deadline:
			elif time_left < earliest_deadline:

				earliest_deadline = time_left
				earliest_job = job_id

		return job_queue[earliest_job][0]
Esempio n. 2
0
	def allocate_work_units(self):
		with self.grid.queue_lock:
			# Check that there are jobs to schedule
			if len(self.grid.get_queued()) == 0:
				self.write_to_log("Waiting for tasks to schedule.\n")
				return
			
			# Write the job queue to the log
			self.write_queue_to_log()
		
			for queue in self.grid.node_queue.keys():
				free_nodes = False
				for node in self.grid.get_free_node(queue):
					free_nodes = True
				
					# Kill any work_units which have no chance of finishing before the deadline.
					for unit in self.grid.get_queued():
					 	if (int(time.time()) + walltime.wall_secs(unit.job.wall_time)) > unit.job.deadline:
					 		unit.kill_msg = "Killed by scheduler: Unable to complete work_unit by deadline."
					 		unit.kill()
				

					# Want to allocate on all free cores on the node
					for free_core in range(0, (node['cores'] - len(node['work_units']))):
					
						# Get the next work unit to allocate
						try:
							unit = self.next_work_unit(node, queue)
						except Exception as e:
							self.write_to_log("Work unit allocator crashed\n")
							exc_type, exc_value, exc_tb = sys.exc_info()
							traceback_msg = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
							self.log.write(traceback_msg)
							self.log.close()
							print "Error in Scheduler. Shutting down Server."
							os._exit(1)
				
						# No work units to allocate for this queue, continue
						if unit == None:
							continue

						# Output to log file
						self.write_to_log("Allocating work unit " + 
									   str(unit.work_unit_id) + " of job " + 
									   str(unit.job.job_id) + " on node " + 
									   str(node['node_id']) + ".\n\n")

						# If allocating the work unit has failed,
						# we break to avoid death.
						try:
							self.allocate_work_unit(node, unit)
						except NodeUnavailableException as e:
							self.write_to_log("Failed to allocated job!\n")
							self.grid.nodes[ node['node_id'] ]['status'] = "DEAD"
			
				# Find a cleaner way to do this!
				if not free_nodes:
					self.write_to_log("Waiting for free nodes of type %s." % queue)
Esempio n. 3
0
	def monitor_tasks(self):
		if len(self.tasks) != 0:
			print self.tasks
		
		for i, task in list(self.tasks.items()):
			# Check if a task has finished
			if task.has_finished():
				self.finish_task(task)
				del self.tasks[i]

			# Kill task if its exceeded it wall time
			elif (int(time.time()) - task.running_ts) > walltime.wall_secs(task.wall_time):
				self.kill_task(task, "Exceeded Wall time.")
				print "Work unit %s of job %s killed: Exceeded Wall Time." % (task.job_id, task.work_unit_id)

			# Kill task if it exceeds its deadline (for fairness)
			elif int(time.time()) > task.deadline:
				self.kill_task(task, "Exceeded deadline.")
				print "Work unit %s of job %s killed: Exceeded Deadline." % (task.job_id, task.work_unit_id)
Esempio n. 4
0
	def next_deadline_work_unit(self, node):
		job_queue = defaultdict(list) 
			
		for unit in self.grid.get_queued():
			# Only want jobs of the specified type in the queue.
			if unit.job.job_type == node['type']:
				job_queue[unit.job.job_id].append(unit)

		# No jobs to schedule of this type!
		if len(job_queue) == 0:
			return None

		# Get the node's cost from the node JSON
		node_cost = node['cost']
		earliest_deadline = None

		work_unit_to_send = None
		for job_id, units in job_queue.items():
			# Check that job runs on node that is within
			# the job's budget
			budget_per_node_hour = units[0].job.budget_per_node_hour
			if budget_per_node_hour >= node_cost:
		
				deadline = int(units[0].job.deadline)
				wall_seconds = walltime.wall_secs(units[0].job.wall_time)
				time_left = deadline - wall_seconds

				# If we don't have a deadline, assign the
				# first job's deadline as earliest
				if earliest_deadline is None:
					earliest_deadline = time_left
					work_unit_to_send = job_queue[job_id][0]

				# Handle case of >1 jobs with varying deadlines
				elif time_left < earliest_deadline:
					earliest_deadline = time_left
					work_unit_to_send = job_queue[job_id][0]
			
				# Handle case where the deadlines are the same but budgets is higher
				elif time_left == earliest_deadline and units[0].job.budget_per_node_hour > work_unit_to_send.job.budget_per_node_hour:
					work_unit_to_send = job_queue[job_id][0]

		return work_unit_to_send
Esempio n. 5
0
    def monitor_tasks(self):
        if len(self.tasks) != 0:
            print self.tasks

        for i, task in list(self.tasks.items()):
            # Check if a task has finished
            if task.has_finished():
                self.finish_task(task)
                del self.tasks[i]

            # Kill task if its exceeded it wall time
            elif (int(time.time()) - task.running_ts) > walltime.wall_secs(
                    task.wall_time):
                self.kill_task(task, "Exceeded Wall time.")
                print "Work unit %s of job %s killed: Exceeded Wall Time." % (
                    task.job_id, task.work_unit_id)

            # Kill task if it exceeds its deadline (for fairness)
            elif int(time.time()) > task.deadline:
                self.kill_task(task, "Exceeded deadline.")
                print "Work unit %s of job %s killed: Exceeded Deadline." % (
                    task.job_id, task.work_unit_id)
Esempio n. 6
0
    def add_job(self, flags, wall_time, deadline, budget, job_type, name):

        # Need to check job_type is a valid queue
        if job_type is None:
            job_type = "DEFAULT"
        elif job_type not in self.node_queue.keys():
            raise InvalidJobTypeException(
                "Invalid Job Type specified: %s. Valid job types are: %s." %
                (job_type, ", ".join(self.node_queue.keys())))

        # Check for Valid budget
        try:
            budget = int(budget)
        except (TypeError, ValueError):
            raise InvalidJobBudgetException(
                "Invalid Budget specified: %s. Format: amount in cents as a whole number."
                % budget)
        if budget < 0:
            raise InvalidJobBudgetException(
                "Invalid Budget specified: %s. Budget must be greater than 0" %
                budget)

        # Check that wall_time is valid:
        try:
            wall_stripped = walltime.strptime(wall_time)
        except WallTimeFormatException:
            raise InvalidWallTimeFormatException(
                "Invalid Wall Time specified: %s. Format: DD:HH:MM:SS." %
                wall_time)

        # Check that deadline format is valid
        try:
            deadline_since_epoch = time.mktime(
                time.strptime(deadline, "%Y-%m-%d %H:%M:%S"))
        except ValueError:
            raise InvalidJobDeadlineFormatException(
                "Invalid Deadline specified: %s. Format: YYYY-MM-DD HH:MM:SS" %
                deadline)

        # Check that deadline is valid
        if deadline_since_epoch <= int(time.time()):
            raise InvalidJobDeadlineException(
                "Invalid Deadline specified: %s. Deadline specified is in the past."
                % deadline)

        # Check that deadline is reasonable
        if (deadline_since_epoch - walltime.wall_secs(wall_stripped)) < int(
                time.time()):
            raise InvalidJobDeadlineException(
                "Error: Current time plus wall time is later than the specified deadline. Please adjust either and resubmit."
            )

        # Check that wall time is within acceptable range for job queue placement
        if self.node_queue[job_type][1] != None and walltime.wall_secs(
                wall_stripped) > walltime.wall_secs(
                    self.node_queue[job_type][1]):
            raise InvalidJobTypeException(
                "Invalid Job Type specified: %s. Wall time %s is too large. Wall time must be shorter than %s for job type %s."
                % (job_type, walltime.strftime(wall_stripped),
                   self.node_queue[job_type][1], job_type))

        #
        # All tests passed, add to grid.
        #

        job = Job(job_id=self.next_job_id,
                  flags=flags,
                  wall_time=wall_stripped,
                  deadline=deadline_since_epoch,
                  budget=budget,
                  job_type=job_type,
                  name=name)

        self.jobs[self.next_job_id] = job
        self.next_job_id += 1

        return job
Esempio n. 7
0
	def add_job(self, flags, wall_time, deadline, budget, job_type, name):
	
		# Need to check job_type is a valid queue
		if job_type is None:
			job_type = "DEFAULT"
		elif job_type not in self.node_queue.keys():
			raise InvalidJobTypeException(
				"Invalid Job Type specified: %s. Valid job types are: %s." % (job_type, ", ".join(self.node_queue.keys()))
				)
		
		# Check for Valid budget
		try:
			budget = int(budget)
		except (TypeError, ValueError):
			raise InvalidJobBudgetException("Invalid Budget specified: %s. Format: amount in cents as a whole number." % budget)
		if budget < 0:
			raise InvalidJobBudgetException("Invalid Budget specified: %s. Budget must be greater than 0" % budget)

		# Check that wall_time is valid:
		try:
			wall_stripped = walltime.strptime(wall_time)
		except WallTimeFormatException:
			raise InvalidWallTimeFormatException("Invalid Wall Time specified: %s. Format: DD:HH:MM:SS." % wall_time)

		# Check that deadline format is valid
		try:
			deadline_since_epoch = time.mktime(time.strptime(deadline, "%Y-%m-%d %H:%M:%S"))
		except ValueError:
			raise InvalidJobDeadlineFormatException("Invalid Deadline specified: %s. Format: YYYY-MM-DD HH:MM:SS" % deadline)

		# Check that deadline is valid
		if deadline_since_epoch <= int(time.time()):
			raise InvalidJobDeadlineException("Invalid Deadline specified: %s. Deadline specified is in the past." % deadline)
		
		# Check that deadline is reasonable
		if (deadline_since_epoch - walltime.wall_secs(wall_stripped)) < int(time.time()):
			raise InvalidJobDeadlineException(
				"Error: Current time plus wall time is later than the specified deadline. Please adjust either and resubmit."
				)
		
		# Check that wall time is within acceptable range for job queue placement
		if self.node_queue[job_type][1] != None and walltime.wall_secs(wall_stripped) > walltime.wall_secs(self.node_queue[job_type][1]):
			raise InvalidJobTypeException(
				"Invalid Job Type specified: %s. Wall time %s is too large. Wall time must be shorter than %s for job type %s."
				% (job_type, walltime.strftime(wall_stripped), self.node_queue[job_type][1], job_type)
				) 

		#
		# All tests passed, add to grid.
		#

		job = Job(
			job_id = self.next_job_id,
			flags = flags, 
			wall_time = wall_stripped, 
			deadline = deadline_since_epoch, 
			budget = budget,
			job_type = job_type,
			name = name
		)


		self.jobs[ self.next_job_id ] = job
		self.next_job_id += 1

		return job
Esempio n. 8
0
	for node_id in request.response:
		node = request.response[node_id]
		if node['status'] == "DEAD":
			continue

		print "Node: %s" % (node_id)
		print "Status: %s" % node['status']
		print "CPU: %s" % (float(node['cpu'])/int(node['cores']))
		print "Cost: $ %0.2f" % (node['cost']/100)
		print "Cores: %s" % node['cores']
		print "Type: %s" % node['type']
		print "Free Spots: %s" % (int(node['cores']) - len(node['work_units']))
		if (int(node['cores']) - len(node['work_units'])) == 0:
			earliest_end = None
			for unit in node['work_units']:
				end = int(unit['created_ts']) + walltime.wall_secs(walltime.strptime(unit['wall_time']))
				if earliest_end == None:
					earliest_end = end
				if end < earliest_end:
					earliest_end = end
			print "Next free: %s" % time.asctime(time.localtime(earliest_end))
		print

	sys.exit(1)


#
# Begin Client
#

# Check the files exist before starting to avoid creating