def getNodes(api, num, **constraints): # Now do the backtracking search for a suitable solution # First with existing slice nodes reqs = [] nodes = [] import node as Node for i in xrange(num): node = Node.Node(api) node.min_num_external_interface = 1 nodes.append(node) node = nodes[0] candidates = filterBlacklist(node.find_candidates()) reqs = [candidates] * num def pickbest(fullset, nreq, node=nodes[0]): if len(fullset) > nreq: fullset = zip(node.rate_nodes(fullset), fullset) fullset.sort(reverse=True) del fullset[nreq:] return set(map(operator.itemgetter(1), fullset)) else: return fullset solution = resourcealloc.alloc(reqs, sample=pickbest) # Do assign nodes runner = ParallelRun(maxthreads=4) for node, node_id in zip(nodes, solution): runner.put(node.assign_node_id, node_id) runner.join() return nodes
def do_wait_nodes(self): for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node): # Just inject configuration stuff node.home_path = "nepi-node-%s" % (guid,) node.ident_path = self.sliceSSHKey node.slicename = self.slicename # Show the magic self._logger.info("PlanetLab Node %s configured at %s", guid, node.hostname) try: runner = ParallelRun(maxthreads=64, maxqueue=1) abort = [] def waitforit(guid, node): try: node.wait_provisioning( (20*60 if node._node_id in self._just_provisioned else 60) ) self._logger.info("READY Node %s at %s", guid, node.hostname) # Prepare dependency installer now node.prepare_dependencies() except: abort.append(None) raise for guid, node in self._elements.iteritems(): if abort: break if isinstance(node, self._node.Node): self._logger.info("Waiting for Node %s configured at %s", guid, node.hostname) runner.put(waitforit, guid, node) runner.join() except self._node.UnresponsiveNodeError: # Uh... self._logger.warn("UNRESPONSIVE Nodes") # Mark all dead nodes (which are unresponsive) on the blacklist # and re-raise for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node): if not node.is_alive(): self._logger.warn("Blacklisting %s for unresponsiveness", node.hostname) self._blacklist.add(node.hostname) node.unassign_node() try: self._save_blacklist() except: # not important... import traceback traceback.print_exc() raise
def test_run_simple(self): runner = ParallelRun(maxthreads=4) runner.start() count = [0] def inc(count): count[0] += 1 for x in xrange(10): runner.put(inc, count) runner.destroy() self.assertEquals(count[0], 10)
def test_run_interrupt(self): def sleep(): import time time.sleep(5) startt = datetime.datetime.now() runner = ParallelRun(maxthreads=4) runner.start() for x in xrange(100): runner.put(sleep) runner.empty() runner.destroy() endt = datetime.datetime.now() time_elapsed = (endt - startt).seconds self.assertTrue(time_elapsed < 500)
def test_run_error(self): count = [0] def inc(count): count[0] += 1 def error(): raise RuntimeError() runner = ParallelRun(maxthreads=4) runner.start() for x in xrange(4): runner.put(inc, count) runner.put(error) runner.destroy() self.assertEquals(count[0], 4) self.assertRaises(RuntimeError, runner.sync)
def _process(self): """ Process scheduled tasks. .. note:: Tasks are scheduled by invoking the schedule method with a target callback and an execution time. The schedule method creates a new Task object with that callback and execution time, and pushes it into the '_scheduler' queue. The execution time and the order of arrival of tasks are used to order the tasks in the queue. The _process method is executed in an independent thread held by the ExperimentController for as long as the experiment is running. This method takes tasks from the '_scheduler' queue in a loop and processes them in parallel using multithreading. The environmental variable NEPI_NTHREADS can be used to control the number of threads used to process tasks. The default value is 50. To execute tasks in parallel, a ParallelRunner (PR) object is used. This object keeps a pool of threads (workers), and a queue of tasks scheduled for 'immediate' execution. On each iteration, the '_process' loop will take the next task that is scheduled for 'future' execution from the '_scheduler' queue, and if the execution time of that task is >= to the current time, it will push that task into the PR for 'immediate execution'. As soon as a worker is free, the PR will assign the next task to that worker. Upon receiving a task to execute, each PR worker (thread) will invoke the _execute method of the EC, passing the task as argument. The _execute method will then invoke task.callback inside a try/except block. If an exception is raised by the tasks.callback, it will be trapped by the try block, logged to standard error (usually the console), and the task will be marked as failed. """ self._nthreads = int( os.environ.get("NEPI_NTHREADS", str(self._nthreads))) self._runner = ParallelRun(maxthreads=self.nthreads) self._runner.start() while not self._stop: try: self._cond.acquire() task = self._scheduler.next() if not task: # No task to execute. Wait for a new task to be scheduled. self._cond.wait() else: # The task timestamp is in the future. Wait for timeout # or until another task is scheduled. now = tnow() if now < task.timestamp: # Calculate timeout in seconds timeout = tdiffsec(task.timestamp, now) # Re-schedule task with the same timestamp self._scheduler.schedule(task) task = None # Wait timeout or until a new task awakes the condition self._cond.wait(timeout) self._cond.release() if task: # Process tasks in parallel self._runner.put(self._execute, task) except: import traceback err = traceback.format_exc() self.logger.error( "Error while processing tasks in the EC: %s" % err) # Set the EC to FAILED state self._state = ECState.FAILED # Set the FailureManager failure level to EC failure self._fm.set_ec_failure() self.logger.debug("Exiting the task processing loop ... ") self._runner.sync() self._runner.destroy()
def _do_in_factory_order(self, action, order, postaction=None, poststep=None): logger = self._logger guids = collections.defaultdict(list) # order guids (elements) according to factory_id for guid, factory_id in self._create.iteritems(): guids[factory_id].append(guid) # configure elements following the factory_id order for factory_id in order: # Create a parallel runner if we're given a Parallel() wrapper runner = None if isinstance(factory_id, Parallel): runner = ParallelRun(factory_id.maxthreads) factory_id = factory_id.factory # omit the factories that have no element to create if factory_id not in guids: continue # configure action factory = self._factories[factory_id] if isinstance(action, basestring) and not getattr(factory, action): continue def perform_action(guid): if isinstance(action, basestring): getattr(factory, action)(self, guid) else: action(self, guid) if postaction: postaction(self, guid) # perform the action on all elements, in parallel if so requested if runner: logger.debug("TestbedController: Starting parallel %s", action) runner.start() for guid in guids[factory_id]: if runner: logger.debug("TestbedController: Scheduling %s on %s", action, guid) runner.put(perform_action, guid) else: logger.debug("TestbedController: Performing %s on %s", action, guid) perform_action(guid) # sync if runner: runner.sync() # post hook if poststep: for guid in guids[factory_id]: if runner: logger.debug( "TestbedController: Scheduling post-%s on %s", action, guid) runner.put(poststep, self, guid) else: logger.debug( "TestbedController: Performing post-%s on %s", action, guid) poststep(self, guid) # sync if runner: runner.join() logger.debug("TestbedController: Finished parallel %s", action)
def do_resource_discovery(self, recover = False): to_provision = self._to_provision = set() reserved = set(self._blacklist) for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node) and node._node_id is not None: reserved.add(node.hostname) # Initial algo: # look for perfectly defined nodes # (ie: those with only one candidate) reserve_lock = threading.RLock() def assignifunique(guid, node): # Try existing nodes first # If we have only one candidate, simply use it candidates = node.find_candidates( filter_slice_id = self.slice_id) node_id = None candidate_hosts = set(candidates.keys() if candidates else []) reserve_lock.acquire() try: candidate_hosts -= reserved if len(candidate_hosts) == 1: hostname = iter(candidate_hosts).next() node_id = candidates[hostname] reserved.add(hostname) elif not candidate_hosts: # Try again including unassigned nodes reserve_lock.release() try: candidates = node.find_candidates() finally: reserve_lock.acquire() candidate_hosts = set(candidates.keys() if candidates else []) candidate_hosts -= reserved if len(candidate_hosts) > 1: return if len(candidate_hosts) == 1: hostname = iter(candidate_hosts).next() node_id = candidates[hostname] to_provision.add(node_id) reserved.add(hostname) elif not candidates: raise RuntimeError, "Cannot assign resources for node %s, no candidates with %s" % (guid, node.make_filter_description()) finally: reserve_lock.release() if node_id is not None: node.assign_node_id(node_id) runner = ParallelRun(maxthreads=4) # don't overload the PLC API, just 4 threads to hide latencies and that's it runner.start() for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node) and node._node_id is None: runner.put(assignifunique, guid, node) runner.sync() # Now do the backtracking search for a suitable solution # First with existing slice nodes reqs = [] nodes = [] def genreqs(node, filter_slice_id=None): # Try existing nodes first # If we have only one candidate, simply use it candidates = node.find_candidates( filter_slice_id = filter_slice_id) for r in reserved: if candidates.has_key(r): del candidates[r] reqs.append(candidates.values()) nodes.append(node) for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node) and node._node_id is None: runner.put(genreqs, node, self.slice_id) runner.sync() if nodes and reqs: if recover: raise RuntimeError, "Impossible to recover: unassigned host for Nodes %r" % (nodes,) def pickbest(fullset, nreq, node=nodes[0]): if len(fullset) > nreq: fullset = zip(node.rate_nodes(fullset),fullset) fullset.sort(reverse=True) del fullset[nreq:] return set(map(operator.itemgetter(1),fullset)) else: return fullset try: solution = resourcealloc.alloc(reqs, sample=pickbest) except resourcealloc.ResourceAllocationError: # Failed, try again with all nodes reqs = [] for node in nodes: runner.put(genreqs, node) runner.sync() solution = resourcealloc.alloc(reqs, sample=pickbest) to_provision.update(solution) # Do assign nodes for node, node_id in zip(nodes, solution): runner.put(node.assign_node_id, node_id) runner.join()