def check(self,progress): """ Parameters ---------- progress : float The current progress. Returns ------- self.difficult : bool If ``True``, the branch is difficult because the progress rate is below the threshold. """ time_now = time.time() one_minute = 60 # [s] if self.time_last is None: self.time_last = time_now self.progress_last = progress else: if time_now-self.time_last>one_minute: progress_rate = progress-self.progress_last # [%/min] self.time_last = time_now self.progress_last = progress self.difficult = progress_rate<self.threshold if self.difficult: tools.info_print('current branch is difficult') return self.difficult
def status_publisher_thread(self): """Main loop thread which publishes the status.""" while True: self.publisher_looprate.update() #-- Main code --------------------------------------- self.mutex.acquire() queue_length = len(self.task_queue) worker_status = copy.deepcopy(self.worker_proc_status) looprates = dict( publisher=self.publisher_looprate.get_frequency(), dispatcher=self.dispatcher_looprate.get_frequency(), collector=self.collector_looprate.get_frequency()) self.mutex.release() self.status_publisher.update(worker_status, queue_length, looprates) #---------------------------------------------------- self.mutex.acquire() stop = self.stop_threads if stop: self.status_publisher.update(self.worker_proc_status, len(self.task_queue), looprates, force=True) self.status_publisher.reset_estimators() self.mutex.release() if stop: tools.info_print('status publisher stopping') return
def offload_child_computation(self,child,location,which_alg, prioritize_self=False,force='none'): """ Offload partitioning for child to another worker process. Parameters ---------- child : Tree The child for which computation is to be offloaded. location : string Location of the child in the overall tree. which_alg : {'ecc','lcss'} Which algorithm is to be run for the partitioning. prioritize_self : bool, optional If ``True``, ignore worker count but do respect recursion limit in terms of assigning work to self. force : {'none','offload','self'}, optional 'offload' means choose to submit task to queue, no matter what; 'self' means continue to work on task alone, no matter what; 'none' means no such forcing. """ # --- Check worker count with open(global_vars.IDLE_COUNT_FILE,'rb') as f: try: idle_worker_count = pickle.load(f) except EOFError: # This may occur if the file is currently being written to by # the scheduler. In this case, conservatively assume that there # are no idle workers idle_worker_count = 0 tools.info_print('idle worker count = %d'%(idle_worker_count)) # --- Check recursion limit recursion_depth = (len(location)- len(self.status_publisher.data['current_branch'])) recurs_limit_reached = recursion_depth>global_vars.MAX_RECURSION_LIMIT if recurs_limit_reached: tools.error_print('recursion limit reached - submitting task' ' to queue') # --- Check difficult of current branch progress = self.status_publisher.data['volume_filled_current'] is_difficult = self.difficulty.check(progress) # --- Offloading logic if (force!='self' and (force=='offload' or recurs_limit_reached or is_difficult or (idle_worker_count>0 and not prioritize_self))): new_task = dict(branch_root=child,location=location, action=which_alg) tools.info_print('sending task {}'.format(new_task)) tools.MPI.nonblocking_send(new_task,dest=global_vars.SCHEDULER_PROC, tag=global_vars.NEW_BRANCH_TAG) else: self.status_publisher.update(algorithm=which_alg) self.alg_call(which_alg,child,location)
def update(self,active=None,failed=False,volume_filled_increment=None,simplex_count_increment=None,location=None,algorithm=None): """ Update the data. Parameters ---------- active : bool, optional ``True`` if the process is in the 'active' state. failed : bool, optional ``True`` if algorithm failed (worker process shutdown) volume_filled_increment : float, optional Volume of closed lead. simplex_count_increment : int, optional How many additional simplices added to partition. location : str, optional Current location in the tree. algorithm : {'ecc','lcss'} Which algorithm is to be run for the partitioning. """ # Update time counters dt = time.time()-self.time_previous self.time_previous += dt if self.data['status']=='active': self.data['time_active_total'] += dt self.data['time_active_current'] += dt else: self.data['time_idle'] += dt if self.data['algorithm']=='ecc': self.data['time_ecc'] += dt else: self.data['time_lcss'] += dt # Update status if active is not None: self.data['status'] = 'active' if active else 'idle' tools.info_print('sending status update, status = %s'%(self.data['status'])) if failed is True: self.data['status'] = 'failed' # Update volume counters if volume_filled_increment is not None: self.data['volume_filled_total'] += volume_filled_increment self.data['volume_filled_current'] += volume_filled_increment/self.volume_current # Update simplex counters if simplex_count_increment is not None: self.data['simplex_count_total'] += simplex_count_increment self.data['simplex_count_current'] += simplex_count_increment # Update location if location is not None: self.data['current_location'] = location # Update algorithm if algorithm is not None: self.data['algorithm'] = algorithm self.__write()
def ecc(self,node,location): """ Implementation of [1] Algorithm 2 lines 4-16. Pass a tree root node and this grows the tree until its leaves are feasible partition cells. **Caution**: modifies ``node`` (passed by reference). [1] D. Malyuta, B. Acikmese, M. Cacan, and D. S. Bayard, "Partition-based feasible integer solution pre-computation for hybrid model predictive control," in 2019 European Control Conference (accepted), IFAC, jun 2019. Parameters ---------- node : Tree Tree root. Sufficient that it just holds node.data.vertices for the simplex vertices. location : string Location in tree of this node. String where '0' at index i means take left child, '1' at index i means take right child (at depth value i). """ self.status_publisher.update(location=location) tools.info_print('ecc at location = %s'%(location)) c_R = np.average(node.data.vertices,axis=0) # Simplex barycenter if not self.oracle.P_theta(theta=c_R,check_feasibility=True): raise RuntimeError('STOP, Theta contains infeasible regions') else: delta_hat,vx_inputs_and_costs = self.oracle.V_R(node.data.vertices) if delta_hat is None: S_1,S_2 = tools.split_along_longest_edge(node.data.vertices)[:2] child_left = NodeData(vertices=S_1) child_right = NodeData(vertices=S_2) node.grow(child_left,child_right) self.status_publisher.update(simplex_count_increment=1) # Recursive call for each resulting simplex self.offload_child_computation(node.left,location+'0','ecc') self.offload_child_computation(node.right,location+'1','ecc', prioritize_self=True) else: # Assign feasible commutation to simplex Nvx = node.data.vertices.shape[0] vertex_costs = np.array([vx_inputs_and_costs[i][1] for i in range(Nvx)]) vertex_inputs = np.array([vx_inputs_and_costs[i][0] for i in range(Nvx)]) node.data = NodeData(vertices=node.data.vertices, commutation=delta_hat, vertex_costs=vertex_costs, vertex_inputs=vertex_inputs) self.offload_child_computation(node,location,'lcss', force='self')
def __publish_idle_count(self): """ Communicate to worker processes how many more workers are idle than there are tasks in the queue. If there are more workers idle then there are tasks in the queue, we want currently active workers to offload some of their work to these "slacking" workers. **NOT THREAD SAFE -- wrap with a mutex!** """ num_tasks = len(self.task_queue) num_idle_workers = len(self.idle_workers) num_workers_with_no_work = max(num_idle_workers - num_tasks, 0) tools.info_print('idle worker count = %d' % (num_idle_workers)) with open(global_vars.IDLE_COUNT_FILE, 'wb') as f: pickle.dump(num_workers_with_no_work, f)
def work_dispatcher_thread(self): """Main loop thread which dispatches tasks to workers.""" while True: self.dispatcher_looprate.update() #-- Main code --------------------------------------- # Dispatch work to idle workers worker_count_changed = False while True: # Check exit condition self.mutex.acquire() num_tasks = len(self.task_queue) num_idle_workers = len(self.idle_workers) self.mutex.release() if num_tasks == 0 or num_idle_workers == 0: break # Dispatch task to idle worker process self.mutex.acquire() task = self.task_queue.pop() idle_worker_idx = self.idle_workers.pop() self.mutex.release() tools.info_print(('dispatching task to worker (%d) (%d ' 'tasks left), data {}' % (self.get_worker_proc_num(idle_worker_idx), num_tasks - 1)).format(task)) self.mutex.acquire() tools.MPI.blocking_send( task, dest=self.get_worker_proc_num(idle_worker_idx), tag=global_vars.NEW_WORK_TAG) self.mutex.release() self.mutex.acquire() self.worker2task[idle_worker_idx] = task self.mutex.release() worker_count_changed = True if worker_count_changed: self.mutex.acquire() self.__publish_idle_count() self.mutex.release() #---------------------------------------------------- self.mutex.acquire() stop = self.stop_threads self.mutex.release() if stop: tools.info_print('work dispatcher stopping') return
def setup(self): # Optimization problem oracle suboptimality_settings = tools.MPI.broadcast(None,root=global_vars.SCHEDULER_PROC) self.oracle = example(abs_err=suboptimality_settings['abs_err'], rel_err=suboptimality_settings['rel_err'])[2] tools.info_print('made oracle') # Checker whether a branch is "difficult" self.difficulty = DifficultyChecker() # Status publisher self.status_publisher = WorkerStatusPublisher() tools.MPI.global_sync() # wait for all slaves to setup # Algorithm call selector def alg_call(which_alg,branch,location): if which_alg=='ecc': return self.ecc(branch,location) else: return self.lcss(branch,location) self.alg_call = alg_call
def spin(self): """ A loop which waits (by passive blocking) for the roots of a new branch to partition to be received from the scheduler process. When received, the branch is partitioned. When done, the partitioned branch ("grown tree") is sent back to the scheduler. The scheduler is responsible for aborting this loop. """ while True: # Block until new data is received from scheduler tools.info_print('waiting for data') data = tools.MPI.blocking_receive(source=global_vars.SCHEDULER_PROC, tag=global_vars.NEW_WORK_TAG) tools.info_print('received data {}'.format(data)) if data['action']=='stop': # Request from scheduler to stop return else: tools.info_print('got branch at location = %s'% (data['location'])) self.status_publisher.update(active=True) # Get data about the branch to be worked on branch = data['branch_root'] branch_location = data['location'] self.status_publisher.set_new_root_simplex(branch.data.vertices, branch_location, data['action']) # Do work on this branch (i.e. partition this simplex) try: tools.info_print('calling algorithm') self.difficulty.reset() # Reset difficult checking self.alg_call(data['action'],branch,branch_location) except: self.status_publisher.update(failed=True) raise # Save completed branch and notify scheduler that it is # available with open(global_vars.DATA_DIR+'/branch_%s.pkl'% (data['location']),'wb') as f: pickle.dump(data,f) tools.info_print('completed task at location = %s, ' 'notifying scheduler'%(data['location'])) self.status_publisher.update(active=False)
def work_collector_thread(self): """Main loop thread which collects new and finished tasks from workers. This thread also evaluates the stopping criterion.""" while True: self.collector_looprate.update() #-- Main code --------------------------------------- # Capture finished workers that are now idle any_workers_became_idle = False for i in self.worker_idxs: self.mutex.acquire() status = self.status_msg[i].receive() self.mutex.release() if status is not None: tools.info_print( ('got status update from worker (%d), ' 'status {}' % (self.get_worker_proc_num(i))).format(status)) self.mutex.acquire() self.worker_proc_status[i] = status self.mutex.release() if status['status'] == 'idle': self.mutex.acquire() self.idle_workers.append(i) if self.worker2task[i] is not None: self.worker2task[i] = None # reset self.mutex.release() any_workers_became_idle = True # Collect any new work from workers new_tasks_available = False for i in self.worker_idxs: self.mutex.acquire() task = self.task_msg[i].receive() self.mutex.release() if task is not None: tools.info_print( ('received new task from worker (%d), ' 'task {}' % (self.get_worker_proc_num(i))).format(task)) self.mutex.acquire() # Inserting at the front naturally "bubbles" the easier # tasks to the top, while the difficult tasks gather at the # beginning of self.task_queue self.task_queue.insert(0, task) self.mutex.release() new_tasks_available = True if any_workers_became_idle or new_tasks_available: self.mutex.acquire() self.__publish_idle_count() self.mutex.release() #---------------------------------------------------- self.mutex.acquire() self.stop_threads = (len(self.idle_workers) == self.N_workers and len(self.task_queue) == 0) stop = self.stop_threads self.mutex.release() if stop: tools.info_print('work collector is stopping') return
def spin(self): """ Manages worker processes until the partitioning process is finished, then shuts the processes down and exits. """ tools.info_print('creating the main loop threads') publisher = threading.Thread(target=self.status_publisher_thread) dispatcher = threading.Thread(target=self.work_dispatcher_thread) collector = threading.Thread(target=self.work_collector_thread) tools.info_print('starting the main loop threads...') collector.start() dispatcher.start() publisher.start() tools.info_print('waiting for the main loop threads to finish...') collector.join() dispatcher.join() publisher.join() tools.info_print('all main loop threads finished')