Python NodeMonitor Examples

Programming Language: Python

Namespace/Package Name: weasel.worker.node_monitor

Class/Type: NodeMonitor

Examples at hotexamples.com: 4

Python NodeMonitor - 4 examples found. These are the top rated real world Python examples of weasel.worker.node_monitor.NodeMonitor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NodeMonitor(1)

add_task_to_monitor(1)

get_data(1)

get_median_utilization(1)

get_task_data(1)

get_utilization_by_histogram(1)

join(1)

reset_known_points(1)

shutdown(1)

start(1)

task_finished(1)

task_started(1)

Example #1

Show file

    def __init__(self):
        self.identity = 'sched-' + socket.gethostbyname(socket.gethostname())
        self.sched_client_thread = ZmqConnectionThread(
            self.identity,
            zmq.DEALER,
            config.SCHEDULER+":" + str(config.ZMQ_SCHEDULER_PORT),
            self.callback)
        self.monitor_thread = NodeMonitor()
        self.running = True
        logfile = config.LOGDIR + "/local_scheduler.log"
        self.logger = WeaselLogger('local_scheduler', logfile)
        self.capacity = self.monitor_thread.capacity
        self.max_tasks_to_run = {}
        ''' the starting number of tasks is defined based on the slot size '''
        self.ntasks_to_ask = 1
        self.task_id = 1
        self.time_asked_first = time.time()
        self.time_from_last_ask = -1
        ''' this is to keep track of number of running tasks ? '''
        self.running_task = 0
        self.nran_tasks = []
        self.time_from_last_ask = time.time()
        self.queues_asked_for = []
        self.current_ntasks = 1
        self.has_new_task = False
        self.is_profiling = False
        self.first_task = False
        self.task_data = {}
        self.t_avg = {}
        self.task_data_lock = threading.Lock()
        self.running_task_lock = threading.Lock()
        self.average_utilization = {'cpu': 0.0, 'memory': 0.0, 'network': 0.0}
        self.average_task_exec_time = 0.0
        self.sleep_time = config.WAITTIME
        self.past_speed_changes = []
        # 'id': id, 'tpool': threadPool, 'rvector': resource_characteristics
        self.queue_data = {}
	self.task_time = 1
        self.queue_data_lock = threading.Lock()
        self.has_new_queue = False
        self.new_queues = []
        self.message_to_send = None
	''' this is to control how many tasks to run in parallel'''
        self.logger.info("NodeScheduler started...")
        self.nrunning_past_period = []

Example #2

Show file

File: node_scheduler.py Project: ovedanner/weasel

	def __init__(self):
		interface = config.INTERFACE
		f = os.popen('ifconfig ' + str(interface) + ' | grep "inet\ addr" | cut -d: -f2 | cut -d" " -f1')
		self.identity = f.read().strip()
		self.queue_data_lock = threading.Lock()

		# Make a random hash for the queue.
		self.random_hash = hashlib.sha1(b'/tmp/Weasel/bin/local_resourcemanager').hexdigest()
		self.task_data = {}
		self.queue_data = {}
		self.max_tasks_to_run_for_queue = {}

		# Will hold the timestamp when we first received a task.
		self.time_start_running = -1

		# Initially we run four tasks per CPU core. This number might
		# be changed by the monitor thread based on resource contention.
		if config.ADAPT_TASKS:
			self.max_tasks_to_run = multiprocessing.cpu_count() * 2
		else:
			self.max_tasks_to_run = config.NR_COLOCATED_TASKS

		# Create the queue for the tasks.
		self.queue_data[self.random_hash] = {'qid': self.random_hash,
											 'asked': 0, 'recv': 0,
											 'tpool': ThreadPool(1, self.task_data)}
		tpool = self.queue_data[self.random_hash]['tpool']
		self.max_tasks_to_run_for_queue[self.random_hash] = self.max_tasks_to_run
		tpool.set_size(self.max_tasks_to_run)
		tpool.start()

		# Create the monitoring thread.
		self.monitor_thread = NodeMonitor(parent=self)

		# Create the thread that performs communication with
		# the scheduler.
		self.sched_client_thread = ZmqConnectionThread(
			self.identity,
			zmq.DEALER,
			config.SCHEDULER + ":" + str(config.ZMQ_SCHEDULER_PORT),
			self.callback)
		self.running = True
		logfile = config.LOGDIR + "/local_scheduler.log"
		self.logger = WeaselLogger('local_scheduler', logfile)
		self.ntasks_to_ask = 1
		self.task_id = 1
		self.time_asked_first = time.time()
		self.running_task = 0
		self.nr_received_tasks = 0
		self.nran_tasks = []
		self.queues_asked_for = []
		self.current_ntasks = 1
		self.has_new_task = False
		self.first_task = False
		self.running_task_lock = threading.Lock()

		# Will hold a map of nr_colocated_tasks => [runtimes]
		self.task_runtimes = {}
		self.task_runtime_lock = threading.Lock()
		self.sleep_time = config.WAITTIME
		self.task_time = 1
		self.has_new_queue = False
		self.new_queues = []
		self.logger.info("NodeScheduler started...")
		self.nrunning_past_period = []

		# Will hold the last started executable.
		self.last_started = None

Example #3

Show file

File: node_scheduler.py Project: ovedanner/weasel

class NodeScheduler(object):
	def __init__(self):
		interface = config.INTERFACE
		f = os.popen('ifconfig ' + str(interface) + ' | grep "inet\ addr" | cut -d: -f2 | cut -d" " -f1')
		self.identity = f.read().strip()
		self.queue_data_lock = threading.Lock()

		# Make a random hash for the queue.
		self.random_hash = hashlib.sha1(b'/tmp/Weasel/bin/local_resourcemanager').hexdigest()
		self.task_data = {}
		self.queue_data = {}
		self.max_tasks_to_run_for_queue = {}

		# Will hold the timestamp when we first received a task.
		self.time_start_running = -1

		# Initially we run four tasks per CPU core. This number might
		# be changed by the monitor thread based on resource contention.
		if config.ADAPT_TASKS:
			self.max_tasks_to_run = multiprocessing.cpu_count() * 2
		else:
			self.max_tasks_to_run = config.NR_COLOCATED_TASKS

		# Create the queue for the tasks.
		self.queue_data[self.random_hash] = {'qid': self.random_hash,
											 'asked': 0, 'recv': 0,
											 'tpool': ThreadPool(1, self.task_data)}
		tpool = self.queue_data[self.random_hash]['tpool']
		self.max_tasks_to_run_for_queue[self.random_hash] = self.max_tasks_to_run
		tpool.set_size(self.max_tasks_to_run)
		tpool.start()

		# Create the monitoring thread.
		self.monitor_thread = NodeMonitor(parent=self)

		# Create the thread that performs communication with
		# the scheduler.
		self.sched_client_thread = ZmqConnectionThread(
			self.identity,
			zmq.DEALER,
			config.SCHEDULER + ":" + str(config.ZMQ_SCHEDULER_PORT),
			self.callback)
		self.running = True
		logfile = config.LOGDIR + "/local_scheduler.log"
		self.logger = WeaselLogger('local_scheduler', logfile)
		self.ntasks_to_ask = 1
		self.task_id = 1
		self.time_asked_first = time.time()
		self.running_task = 0
		self.nr_received_tasks = 0
		self.nran_tasks = []
		self.queues_asked_for = []
		self.current_ntasks = 1
		self.has_new_task = False
		self.first_task = False
		self.running_task_lock = threading.Lock()

		# Will hold a map of nr_colocated_tasks => [runtimes]
		self.task_runtimes = {}
		self.task_runtime_lock = threading.Lock()
		self.sleep_time = config.WAITTIME
		self.task_time = 1
		self.has_new_queue = False
		self.new_queues = []
		self.logger.info("NodeScheduler started...")
		self.nrunning_past_period = []

		# Will hold the last started executable.
		self.last_started = None

	def run_task(self, arg):
		command_id = arg['id']
		command = arg['exec'] + ' ' + arg['params']
		qid = arg['qid']
		myid = threading.current_thread().ident

		# Tell the monitor thread we have started a task.
		self.monitor_thread.task_started(myid)

		# Increment the number of running tasks.
		self.running_task_lock.acquire()
		self.running_task += 1
		nr_colocated = self.running_task

		# The first time we run a task of a different type, we reset
		# the history on the monitor thread.
		if self.last_started is None:
			self.last_started = arg['exec']
		if self.last_started != arg['exec']:
			print('Started new task type: ' + str(arg['exec']))
			sys.stdout.flush()
			self.monitor_thread.reset_known_points()
			self.last_started = arg['exec']
		self.running_task_lock.release()

		start_time = time.time()
		proc = psutil.Popen(command, shell=True,
							stdout=PIPE, stderr=PIPE)
		self.task_data[myid]['lock'].acquire()
		self.task_data[myid]['proc'] = proc
		self.task_data[myid]['ctask'] = arg
		self.task_data[myid]['lock'].release()
		out, err = proc.communicate()
		return_code = proc.returncode
		if return_code != 0:
			print('Error when returning: ' + str(return_code))
			sys.stdout.flush()
		end_time = time.time()

		# Record task running times.
		self.task_runtime_lock.acquire()
		running_time = end_time - start_time
		if nr_colocated not in self.task_runtimes:
			self.task_runtimes[nr_colocated] = []
		self.task_runtimes[nr_colocated].append(running_time)
		print("Task %s ran in %s seconds (%s)" % (str(command_id), str(running_time), str(arg['exec'])))
		sys.stdout.flush()
		self.task_runtime_lock.release()

		# Tell the monitor thread we have finished a task.
		self.monitor_thread.task_finished(myid)

		self.task_data[myid]['lock'].acquire()
		self.task_data[myid]['ctask'] = None
		if self.task_data[myid]['task'].get(qid) == None:
			self.task_data[myid]['task'][qid] = []
			self.external_change = True
		self.task_data[myid]['task'][qid].append(
			[end_time - start_time, 100 * (end_time - start_time) / (end_time - start_time)])
		self.task_data[myid]['lock'].release()
		self.running_task_lock.acquire()
		self.running_task -= 1
		self.nran_tasks.append(command_id)
		self.running_task_lock.release()

	def get_total_queue_size(self):
		queue_size = 0
		self.queue_data_lock.acquire()
		for qid in self.queue_data:
			queue_size = queue_size + self.queue_data[qid]['tpool'].tasks.qsize()
		self.queue_data_lock.release()
		return queue_size

	def get_tasks_to_ask(self):
		"""
		Returns the number of tasks to ask from the scheduler. Tries to keep the queue size at
		least as long as the maximum allowed number of tasks at the moment.
		:return:
		"""
		tasks_to_ask = {}
		self.queues_asked_for = []
		queue_size = self.get_total_queue_size()
		self.queue_data_lock.acquire()
		for qid in self.queue_data:
			tasks_to_ask[qid] = 0
			self.queue_data[qid]['asked'] = 0
			self.queue_data[qid]['recv'] = 0
			qsize = self.queue_data[qid]['tpool'].tasks.qsize()
			if qsize > 2 * self.max_tasks_to_run_for_queue[qid] and self.max_tasks_to_run_for_queue[qid] != -1:
				continue
			if qsize == 0:
				tasks_to_ask[qid] = self.max_tasks_to_run_for_queue[qid]
			else:
				if qsize > self.max_tasks_to_run_for_queue[qid] and self.max_tasks_to_run_for_queue[qid] != -1:
					continue
				elif qsize < self.max_tasks_to_run_for_queue[qid]:
					tasks_to_ask[qid] = self.max_tasks_to_run_for_queue[qid] - qsize
			self.queues_asked_for.append(qid)
			self.queue_data[qid]['asked'] = tasks_to_ask[qid]
		self.queue_data_lock.release()
		return tasks_to_ask, queue_size

	def wait_and_ask(self):
		while self.running:
			# check at 0.2 seconds
			time.sleep(0.2)

			self.running_task_lock.acquire()
			nrunning = self.running_task
			self.nrunning_past_period.append(nrunning)
			task_data_to_send = {'ran': self.nran_tasks[:]}
			self.nran_tasks = []
			self.running_task_lock.release()
			(tasks_to_ask, queue_size) = self.get_tasks_to_ask()
			task_data_to_send['qsize'] = queue_size * self.task_time
			pickled_data = pickle.dumps(task_data_to_send)
			if len(tasks_to_ask) > 0:
				self.sched_client_thread.put_request_in_queue(
					[self.identity, PROTOCOL_HEADERS['WORKER'], 'task', pickle.dumps(tasks_to_ask), pickled_data])

	def process_task(self, task):
		tmp = task.split(';')
		task_name = tmp[-1].split()[0].split('/')[-1]
		new_task = False
		return new_task

	def add_task_to_queues(self, tasks):
		for task in tasks['tasks']:
			self.running_task_lock.acquire()
			self.nr_received_tasks += 1
			self.running_task_lock.release()
			new_task = self.process_task(task['exec'])
			task_hash = hashlib.sha1(task['exec'].encode()).hexdigest()
			self.has_new_task |= new_task
			task['qid'] = task_hash
			self.queue_data[self.random_hash]['tpool'].add_task(self.run_task, task)

	def get_latest_task_type(self):
		"""
		Return the latest started task type.
		:return:
		"""
		self.running_task_lock.acquire()
		latest = self.last_started
		self.running_task_lock.release()
		return latest

	def running_identical_tasks(self):
		"""
		Returns whether or not the worker is currently only running tasks of the same type.
		:return:
		"""
		current = None
		task_threads = self.task_data.keys()
		try:
			for task_thread in task_threads:
				if (task_thread not in self.task_data) or ('lock' not in self.task_data[task_thread]):
					continue
				self.task_data[task_thread]['lock'].acquire()
				if 'ctask' in self.task_data[task_thread] and self.task_data[task_thread]['ctask'] is not None:
					if current is None:
						current = self.task_data[task_thread]['ctask']['exec']
					elif current != self.task_data[task_thread]['ctask']['exec']:
						self.task_data[task_thread]['lock'].release()
						return False
				self.task_data[task_thread]['lock'].release()
			return True
		except Exception, e:
			print('Got exception while trying to determine identical tasks')
			print(e)
			sys.stdout.flush()

Example #4

Show file

class NodeScheduler(object):

    def __init__(self):
        self.identity = 'sched-' + socket.gethostbyname(socket.gethostname())
        self.sched_client_thread = ZmqConnectionThread(
            self.identity,
            zmq.DEALER,
            config.SCHEDULER+":" + str(config.ZMQ_SCHEDULER_PORT),
            self.callback)
        self.monitor_thread = NodeMonitor()
        self.running = True
        logfile = config.LOGDIR + "/local_scheduler.log"
        self.logger = WeaselLogger('local_scheduler', logfile)
        self.capacity = self.monitor_thread.capacity
        self.max_tasks_to_run = {}
        ''' the starting number of tasks is defined based on the slot size '''
        self.ntasks_to_ask = 1
        self.task_id = 1
        self.time_asked_first = time.time()
        self.time_from_last_ask = -1
        ''' this is to keep track of number of running tasks ? '''
        self.running_task = 0
        self.nran_tasks = []
        self.time_from_last_ask = time.time()
        self.queues_asked_for = []
        self.current_ntasks = 1
        self.has_new_task = False
        self.is_profiling = False
        self.first_task = False
        self.task_data = {}
        self.t_avg = {}
        self.task_data_lock = threading.Lock()
        self.running_task_lock = threading.Lock()
        self.average_utilization = {'cpu': 0.0, 'memory': 0.0, 'network': 0.0}
        self.average_task_exec_time = 0.0
        self.sleep_time = config.WAITTIME
        self.past_speed_changes = []
        # 'id': id, 'tpool': threadPool, 'rvector': resource_characteristics
        self.queue_data = {}
	self.task_time = 1
        self.queue_data_lock = threading.Lock()
        self.has_new_queue = False
        self.new_queues = []
        self.message_to_send = None
	''' this is to control how many tasks to run in parallel'''
        self.logger.info("NodeScheduler started...")
        self.nrunning_past_period = []

    def profile(self, nrunning):
        pass

    def change_work_queue(self, nrunning, nrunning_past, avg_time, avg_cpu):
        pass

    def run_task(self, arg):
	command_id = arg['id']
        command = arg['exec'] +' ' + arg['params']
	qid = arg['qid']
	myid = threading.current_thread().ident
        self.running_task_lock.acquire()
        self.running_task = self.running_task + 1
        self.running_task_lock.release()
        ''' this also marks that at least one task runs on the node ... '''
        ''' here I need to put it in the queue of tasks that the monitor will watch over '''
        memory_average = 0.0
        cpu_average = 0.0
        nreads = 0
        nwrites = 0
        nbytesread = 0
        nbyteswritten = 0
        time_intervals = 0
        start_time = time.time()
        proc = psutil.Popen(command, shell=True,
                            stdout=PIPE, stderr=PIPE)
	self.task_data[myid]['lock'].acquire()
	self.task_data[myid]['proc'] = proc
	self.task_data[myid]['ctask'] = arg
	self.task_data[myid]['lock'].release()
        out, err = proc.communicate()
        end_time = time.time()
        self.task_data[myid]['lock'].acquire()
        if self.task_data[myid]['task'].get(qid) == None:
            self.task_data[myid]['task'][qid] = []
            self.external_change = True
        self.task_data[myid]['task'][qid].append(
            [end_time - start_time, 100 * (end_time - start_time) / (end_time - start_time)])
        self.task_data[myid]['lock'].release()
        self.running_task_lock.acquire()
        self.running_task = self.running_task - 1
        self.nran_tasks.append(command_id)
        self.running_task_lock.release()

    def get_total_queue_size(self):
        queue_size = 0
        self.queue_data_lock.acquire()
        for qid in self.queue_data:
	    #print "Queue ", qid, " size ", self.queue_data[qid]['tpool'].tasks.qsize()
            queue_size = queue_size + \
                self.queue_data[qid]['tpool'].tasks.qsize()
        self.queue_data_lock.release()
        return queue_size

    def get_tasks_to_ask(self, nrunning):
        tasks_to_ask = {}
        self.queues_asked_for = []
        queue_size = self.get_total_queue_size()
        if queue_size + nrunning == 0 and not self.is_profiling:
            return (tasks_to_ask, queue_size)
        self.queue_data_lock.acquire()
        for qid in self.queue_data:
	    tasks_to_ask[qid] = 0
            self.queue_data[qid]['asked'] = 0
            self.queue_data[qid]['recv'] = 0
            qsize = self.queue_data[qid]['tpool'].tasks.qsize()
            if qsize > 2 * self.max_tasks_to_run[qid] and self.max_tasks_to_run[qid] != -1:
                continue
            if qsize == 0:
                tasks_to_ask[qid] = max(10, 2 * self.max_tasks_to_run[qid])
            else:
                if qsize > 2 * self.max_tasks_to_run[qid] and self.max_tasks_to_run[qid] != -1:
                    continue
                elif qsize < 2 * self.max_tasks_to_run[qid]:
                    tasks_to_ask[qid] = 2
            self.queues_asked_for.append(qid)
            self.queue_data[qid]['asked'] = tasks_to_ask[qid]
        self.queue_data_lock.release()
        return (tasks_to_ask, queue_size)

    def wait_and_ask(self):
        while self.running:
            # check at 0.2 seconds
            time.sleep(0.2)
            # how much time is passed from the last time we asked the rmng
            ctime = time.time()
            if ctime - self.time_from_last_ask > 2 * config.WAITTIME:
                # here we mark the queues as dead
                for qid in self.queues_asked_for:
		    if self.queue_data[qid]['tpool'].tasks.qsize() == 0:
                        print "@@@@@@@@@@@@@@@@@@@  I mark queue ", qid, " as dead because I don't have tasks for it"
                        self.max_tasks_to_run[qid] = -1
            self.running_task_lock.acquire()
            nrunning = self.running_task
            self.nrunning_past_period.append(nrunning)
            task_data_to_send = {'ran': self.nran_tasks[:]}
            self.nran_tasks = []
            self.running_task_lock.release()
            resources = {'cpu': 0, 'memory': 0, 'network': 0}
            if self.is_profiling:
                self.profile(nrunning)
            (tasks_to_ask, queue_size) = self.get_tasks_to_ask(nrunning)
	    #print "Asking for tasks: ", tasks_to_ask, queue_size
	    task_data_to_send['qsize'] = queue_size * self.task_time
            pickled_data = pickle.dumps(task_data_to_send)
	    if self.is_profiling and config.POLICY == 'dynamic3':
                if qsize + nrunning == 0 and not self.first_task:
                    self.sched_client_thread.put_request_in_queue(
                        [self.identity, PROTOCOL_HEADERS['WORKER'], 'task_empty',
                         str(2 * self.current_ntasks), pickled_data])
                    self.first_task = True
                continue
            elif len(tasks_to_ask) > 0:
                self.sched_client_thread.put_request_in_queue(
                    [self.identity, PROTOCOL_HEADERS['WORKER'], 'task',
                     pickle.dumps(tasks_to_ask), pickled_data])
	    self.message_to_send = pickled_data

    def process_task(self, task):
        tmp = task.split(';')
        task_name = tmp[-1].split()[0].split('/')[-1]
        new_task = False
        # I have new tasks!!
        if task_name not in self.monitor_thread.tasks_to_monitor:
            new_task = True
            self.is_profiling = True
            self.monitor_thread.add_task_to_monitor(task_name)
        return new_task

    def add_task_to_queues(self, tasks):
        if len(tasks['queues']) > 0:
	    print tasks['queues']
            self.queue_data_lock.acquire()
            for queue in tasks['queues']:
                if not self.queue_data.get(queue):
                    self.new_queues.append(queue)
                    self.max_tasks_to_run[queue] = 0
                    self.queue_data[queue] = {
                        'qid': queue,
			'elapsed':0,
			'tavg': 0,
			'thoughput':0,
                        'asked': 0,
                        'recv': 0,
                        'type': "",
			'tpool': ThreadPool(
                            0,
                            self.task_data),
                        'resource': ""}  # this contains the resource vector of a task
                    self.has_new_queue = True
                if tasks['queues'][queue] == -1:
                    print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Queue ", queue, " is empty!"
                    self.max_tasks_to_run[queue] = -1
            self.queue_data_lock.release()
        self.queue_data_lock.acquire()
        for task in tasks['tasks']:
	    #print "Adding tasks to queues: ", task
            ''' here: if the task does not exist in my history:
                shrink the pool at 1 task and enter the profiling mode
                profiling mode = record resource util for the first 10 tasks '''
            qid = hashlib.sha1(task['exec'].encode()).hexdigest()
            self.has_new_task = self.has_new_task | self.process_task(
                task['exec'])
	    task['qid'] = qid
            self.add_task_to_queue(self.queue_data[qid]['tpool'], task)
            self.queue_data[qid]['recv'] = self.queue_data[qid]['recv'] + 1
        self.queue_data_lock.release()

    def callback(self, frames):
        ''' this is a message from the server '''
	command = frames[2]
        data = None
        if len(frames) > 3:
            data = frames[3]
        if command == 'shutdown':
            self.shutdown(None)
        elif command == 'task':
            self.time_from_last_ask = time.time()
            tasks = pickle.loads(data)
            self.add_task_to_queues(tasks)
	elif command == 'empty':
            for qid in self.queue_data:
                self.empty_queue(self.queue_data[qid]['tpool'])
	else:
	    print "No callback for this message!"


    def add_task_to_queue(self, queue, task):
        queue.add_task(self.run_task, task)

    def empty_queue(self, queue):
        while not queue.empty():
            try:
                queue.get(False)
            except Empty:
                continue
            queue.task_done()

    def shutdown(self, data):
        self.running = False

    def log_node_utilization(self):
        median = self.monitor_thread.get_median_utilization()
        histo_util = self.monitor_thread.get_utilization_by_histogram()
        data = self.monitor_thread.get_data()
        cpu_sum = median['cpu'] + median['cpu_idle'] + \
            median['cpu_sys'] + median['cpu_io']
        if cpu_sum == 0:
            real_value = 0
        else:
            real_value = 100 * (median['cpu']) / cpu_sum
        self.logger.info(
            "Median utilization/2secs: %s %s %s %s" %
            (median['cpu'],
             median['memory'],
                median['network'],
                100 *
                median['cpu_io'] /
                cpu_sum))
        self.logger.info(
            "Histo utilization: %s %s %s" %
            (histo_util['cpu'],
             histo_util['memory'],
             histo_util['network']))

    def is_ok_to_ask(self):
        return True

    def empty_task_data(self):
        for tid in self.task_data:
            self.task_data[tid]['lock'].acquire()
            self.task_data[tid]['task'] = {}
            self.task_data[tid]['lock'].release()

    def check_empty_queues(self):
        queues_empty = True
        self.queue_data_lock.acquire()
        for qid in self.queue_data:
            queues_empty = queues_empty & self.queue_data[
                qid]['tpool'].tasks.empty()
        self.queue_data_lock.release()
        return queues_empty


    def compute_stats(self, task_data, avg_time, avg_cpu):
        total_len = 0
        try:
            for tid in task_data:
                task_data[tid]['lock'].acquire()
                for task in task_data[tid]['task']:
                    if not avg_time.get(task):
                        avg_time[task] = 0
                        avg_cpu[task] = 0
                    for data in task_data[tid]['task'][task]:
                        avg_time[task] = avg_time[task] + data[0]
                        avg_cpu[task] = avg_cpu[task] + data[1]
                    total_len = total_len + \
                            len(task_data[tid]['task'][task])
                    if not self.is_profiling:
                        # leave the last value
                        while len(task_data[tid]['task'][task]) > 0:
                            task_data[tid]['task'][task].pop(0)
                task_data[tid]['lock'].release()
            for task in avg_time:
                avg_time[task] = avg_time[task] / total_len
                avg_cpu[task] = avg_cpu[task] / total_len
	    self.empty_task_data()
        except:
            traceback.print_exc()

    def run(self):
        self.sched_client_thread.start()
        self.monitor_thread.start()
        finishing_tasks_thread = Thread(target=self.wait_and_ask)
        finishing_tasks_thread.start()
        '''  I have: - the monitoring thread
        - the communication thread
        - the thread that waits to ask for more tasks
        '''
        while self.running:
            ''' if queue is empty and no other tasks are running: ask for task to the scheduler '''
            ''' else if tasks are running check the utilization and ask for more/less '''
            self.log_node_utilization()
            task_data = self.monitor_thread.get_task_data()
            total_util = {'cpu': 0, 'memory': 0}
            for task in task_data:
                for data in task_data[task]:
                    total_util['cpu'] = total_util[
                        'cpu'] + data[0][0] / len(task_data[task])
                    total_util['memory'] = total_util[
                        'memory'] + data[1] / len(task_data[task])
            self.logger.info(
                "Total utilization of the other processes is: %s %s" %
                (total_util['cpu'], total_util['memory']))
            # count the total number of slots
            self.running_task_lock.acquire()
            nrunning = self.running_task
            nrunning_past = self.nrunning_past_period[:]
            self.nrunning_past_period = []
            self.running_task_lock.release()
            for task in self.max_tasks_to_run:
                self.logger.info(
                    "%s Running tasks: %s" %
                    (task, self.max_tasks_to_run[task]))
            if self.check_empty_queues() and nrunning == 0:
                if self.is_ok_to_ask():
                    ''' I have finished all my tasks, ask for random task from the resource mng '''
		    print "Sending task_empty message!"
                    self.sched_client_thread.put_request_in_queue(
                            [self.identity, PROTOCOL_HEADERS['WORKER'], 'task_empty', str(2 * self.capacity['cores'])])
            avg_time = {}
            avg_cpu = {}
            task_data = self.task_data
            self.compute_stats(task_data, avg_time, avg_cpu)
            # now is the time to remove the data from the dead threads
            self.queue_data_lock.acquire()
            for qid in self.queue_data:
                self.queue_data[qid]['tpool'].dict_lock.acquire()
                for tid in self.queue_data[qid]['tpool'].deleted_workers:
                    if self.task_data.get(tid):
                        del self.task_data[tid]
                self.queue_data[qid]['tpool'].deleted_workers = []
                self.queue_data[qid]['tpool'].dict_lock.release()
            taskid = 0
	    max_task_time = 0
            for task in avg_time:
		if config.POLICY != 'static':
                    if avg_time[task] == 0:
		        self.queue_data[task]['elapsed'] = self.queue_data[task]['elapsed'] + config.WAITTIME
		    else:
		        self.queue_data[task]['elapsed'] = 0
		        self.queue_data[task]['tavg'] = (self.queue_data[task]['tavg'] + avg_time[task])/2
		if avg_time[task] > max_task_time:
                    max_task_time = avg_time[task] 
		print task, "Avg_time: ", avg_time[task]
                self.logger.info(
                        "%s Avg_time: %s" %
                        (task, avg_time[task]))
                self.logger.info(
                        "%s Task speed: %s" %
                        (task, self.max_tasks_to_run[task] / avg_time[task]))
                self.logger.info(
                        "%s Task util: %s" %
                        (task, avg_cpu[task]))
                self.past_speed_changes.append(
                        self.max_tasks_to_run[task] /
                        avg_time[task])
                if len(self.past_speed_changes) > 4:
                    self.past_speed_changes.pop(0)
                taskid = taskid + 1
	    
	        if config.POLICY != 'static' and self.queue_data[task]['type'] == "":
		    if self.queue_data[task]['elapsed'] > config.T_LONG or \
				self.queue_data[task]['tavg'] > config.T_LONG:
		        self.queue_data[task]['type'] = 'long'
		    if self.queue_data[task]['tavg'] < config.T_LONG:
		        self.queue_data[task]['type'] = 'short'
	    self.queue_data_lock.release()
            max_avg_time = 0
	    self.task_time = max_task_time
            self.change_work_queue(nrunning, nrunning_past, avg_time, avg_cpu)
            self.logger.info("Ran %s tasks" % self.task_id)
	    self.logger.debug("Sleeping: %s" % self.sleep_time)
            self.monitor_thread.max_data_buffer_len = int(
                self.sleep_time /
                config.MONITOR_PERIOD)
            time.sleep(self.sleep_time)
        finishing_tasks_thread.join()
        for qid in self.queue_data:
            self.queue_data[qid]['tpool'].wait_completion()
        self.sched_client_thread.stop()
        self.monitor_thread.shutdown()
        self.monitor_thread.join()
        self.sched_client_thread.join()