def state(self): """ Returns the state of the application """ if self._state == ResourceState.STARTED: # In order to avoid overwhelming the remote host and # the local processor with too many ssh queries, the state is only # requested every 'state_check_delay' seconds. if tdiffsec(tnow(), self._last_state_check) > state_check_delay: self.check_state_connection() self._last_state_check = tnow() return self._state
def do_start(self): """ Start the RM. It means : Send Xmpp Message Using OMF protocol to execute the application. """ ## For performance test if self.sperf: self.begin_start_time = tnow() self.sperf = False if not self.get('env'): self.set('env', " ") if self.get('version') == "5": self.begin_start_time = tnow() # Some information to check the command for OMF5 msg = " " + self.get_rtype() + " ( Guid : " + str(self._guid) +") : " + \ self.get('appid') + " : " + self._path + " : " + \ self._args + " : " + self.get('env') self.debug(msg) self._omf_api.execute(self.node.get('hostname'),self.get('appid'), \ self._args, self._path, self.get('env')) else: #For OMF 6 if self._start_cnt == 0: props = {} props['state'] = "running" guards = {} guards['type'] = "application" guards['name'] = self.get('command') self._omf_api.frcp_configure(self._topic_app, props=props, guards=guards) if self._start_cnt > confirmation_counter: msg = "Couldn't retrieve the confirmation that the application started" self.error(msg) raise RuntimeError, msg res = self.check_start(self._topic_app) if not res: self._start_cnt += 1 self.ec.schedule(reschedule_check, self.start) return super(OMFApplication, self).do_start()
def state(self): """ Returns the state of the application """ if self._state == ResourceState.STARTED: if self.in_foreground: # Check if the process we used to execute the command # is still running ... retcode = self._proc.poll() # retcode == None -> running # retcode > 0 -> error # retcode == 0 -> finished if retcode: out = "" msg = " Failed to execute command '%s'" % self.get("command") err = self._proc.stderr.read() self.error(msg, out, err) self.do_fail() elif retcode == 0: self.set_stopped() else: # We need to query the status of the command we launched in # background. In order to avoid overwhelming the remote host and # the local processor with too many ssh queries, the state is only # requested every 'state_check_delay' seconds. state_check_delay = 0.5 if tdiffsec(tnow(), self._last_state_check) > state_check_delay: if self.pid and self.ppid: # Make sure the process is still running in background status = self.node.status(self.pid, self.ppid) if status == ProcStatus.FINISHED: # If the program finished, check if execution # errors occurred (out, err), proc = self.node.check_errors( self.run_home) if err: msg = "Failed to execute command '%s'" % \ self.get("command") self.error(msg, out, err) self.do_fail() else: self.set_stopped() self._last_state_check = tnow() return self._state
def state(self): state_check_delay = 0.5 if self._state == ResourceState.STARTED and \ tdiffsec(tnow(), self._last_state_check) > state_check_delay: if self.get("deviceName"): (out, err), proc = self.node.execute("ifconfig") if out.strip().find(self.get("deviceName")) == -1: # tap is not running is not running (socket not found) self.set_stopped() self._last_state_check = tnow() return self._state
def state(self): state_check_delay = 0.5 if self._state == ResourceState.STARTED and \ tdiffsec(tnow(), self._last_state_check) > state_check_delay: if self.get("deviceName"): (out, err), proc = self.node.execute("ip a") if out.strip().find(self.get("deviceName")) == -1: # tap is not running is not running (socket not found) self.set_stopped() self._last_state_check = tnow() return self._state
def test_task_order(self): def first(): return 1 def second(): return 2 def third(): return 3 scheduler = HeapScheduler() t1 = tnow() t2 = stabsformat("2s") t3 = stabsformat("3s") tsk1 = Task(t1, first) tsk2 = Task(t2, second) tsk3 = Task(t3, third) # schedule the tasks in disorder scheduler.schedule(tsk2) scheduler.schedule(tsk3) scheduler.schedule(tsk1) # Make sure tasks are retrieved in teh correct order tsk = scheduler.next() self.assertEquals(tsk.callback(), 1) tsk = scheduler.next() self.assertEquals(tsk.callback(), 2) tsk = scheduler.next() self.assertEquals(tsk.callback(), 3)
def do_start(self): """ Start the RM. It means : Send Xmpp Message Using OMF protocol to execute the application. """ ## For performance test if self.sperf: self.begin_start_time = tnow() self.sperf = False if not self.get('env'): self.set('env', " ") if self.get('version') == "5": self.begin_start_time = tnow() # Some information to check the command for OMF5 msg = " " + self.get_rtype() + " ( Guid : " + str(self._guid) +") : " + \ self.get('appid') + " : " + self._path + " : " + \ self._args + " : " + self.get('env') self.debug(msg) self._omf_api.execute(self.node.get('hostname'),self.get('appid'), \ self._args, self._path, self.get('env')) else: #For OMF 6 if self._start_cnt == 0: props = {} props['state'] = "running" guards = {} guards['type'] = "application" guards['name'] = self.get('command') self._omf_api.frcp_configure(self._topic_app, props = props, guards = guards ) if self._start_cnt > confirmation_counter: msg = "Couldn't retrieve the confirmation that the application started" self.error(msg) raise RuntimeError, msg res = self.check_start(self._topic_app) if not res: self._start_cnt +=1 self.ec.schedule(reschedule_check, self.start) return super(OMFApplication, self).do_start()
def set_state(self, state, state_time_attr, time = None): """ Set the state of the RM while keeping a trace of the time """ # Ensure that RM state will not change after released if self._state == ResourceState.RELEASED: return time = time or tnow() self.set_state_time(state, state_time_attr, time)
def state(self): # First check if the ccnd has failed state_check_delay = 0.5 if self._state == ResourceState.STARTED and \ tdiffsec(tnow(), self._last_state_check) > state_check_delay: (out, err), proc = self._ccndstatus() retcode = proc.poll() if retcode == 1 and err.find("No such file or directory") > -1: # ccnd is not running (socket not found) self.set_stopped() elif retcode: # other errors ... msg = " Failed to execute command '%s'" % self.get("command") self.error(msg, out, err) self.fail() self._last_state_check = tnow() return self._state
def do_deploy(self): """ Deploy the RM. It means : Get the xmpp client and send messages using OMF 5.4 or 6 protocol to configure the channel. """ ## For performance test if self.perf: self.begin_deploy_time = tnow() self.perf = False if not self.get('channel'): msg = "Channel's value is not initialized" self.error(msg) raise RuntimeError, msg if self.get('version') == "6": self.frequency = self.get_frequency(self.get('channel')) super(OMFChannel, self).do_deploy() return if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self._omf_api: self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id=self.exp_id) self._nodes_guid = self._get_target(self._connections) if self._nodes_guid == "reschedule": self.ec.schedule("1s", self.deploy) else: for couple in self._nodes_guid: attrval = self.get('channel') attrname = "net/%s/%s" % (couple[1], 'channel') self._omf_api.configure(couple[0], attrname, attrval) super(OMFChannel, self).do_deploy()
def do_deploy(self): """ Deploy the RM. It means : Get the xmpp client and send messages using OMF 5.4 or 6 protocol to configure the channel. """ ## For performance test if self.perf: self.begin_deploy_time = tnow() self.perf = False if not self.get('channel'): msg = "Channel's value is not initialized" self.error(msg) raise RuntimeError, msg if self.get('version') == "6": self.frequency = self.get_frequency(self.get('channel')) super(OMFChannel, self).do_deploy() return if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self._omf_api : self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id = self.exp_id) self._nodes_guid = self._get_target(self._connections) if self._nodes_guid == "reschedule" : self.ec.schedule("1s", self.deploy) else: for couple in self._nodes_guid: attrval = self.get('channel') attrname = "net/%s/%s" % (couple[1], 'channel') self._omf_api.configure(couple[0], attrname, attrval) super(OMFChannel, self).do_deploy()
def do_deploy(self): """ Deploy the RM. It means : Send Xmpp Message Using OMF protocol to enroll the node into the experiment. """ ## For performance test if self.perf: self.begin_deploy_time = tnow() self.perf = False if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not self.get('version'): msg = "Version of OMF is not indicated" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self._omf_api: self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id=self.exp_id) if not self.get('hostname'): msg = "Hostname's value is not initialized" self.error(msg) raise RuntimeError, msg if self.get('version') == "5": self._omf_api.enroll_host(self.get('hostname')) else: self._omf_api.enroll_topic(self.get('hostname')) super(OMFNode, self).do_deploy()
def do_release(self): """ Clean the RM at the end of the experiment and release the API. """ ## For performance test if self.rperf: self.begin_release_time = tnow() self.rperf = False if self._omf_api: if self.get('version') == "6" and self._topic_app: if not self.release_id: self.release_id = os.urandom(16).encode('hex') self._omf_api.frcp_release(self.release_id, self.node.get('hostname'), self._topic_app, res_id=self._topic_app) if self._release_cnt < confirmation_counter: cid = self.check_release(self.release_id) if not cid: self._release_cnt += 1 self.ec.schedule(reschedule_check, self.release) return else: msg = "Couldn't retrieve the confirmation of the release" self.error(msg) # Remove the stdout and stderr of the application try: os.remove('/tmp/' + self._topic_app + '.out') os.remove('/tmp/' + self._topic_app + '.err') except OSError: pass OMFAPIFactory.release_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id=self.exp_id) super(OMFApplication, self).do_release()
def __init__(self, ec, guid): super(LinuxApplication, self).__init__(ec, guid) self._pid = None self._ppid = None self._node = None self._home = "app-%s" % self.guid # whether the command should run in foreground attached # to a terminal self._in_foreground = False # whether to use sudo to kill the application process self._sudo_kill = False # keep a reference to the running process handler when # the command is not executed as remote daemon in background self._proc = None # timestamp of last state check of the application self._last_state_check = tnow()
def do_deploy(self): """ Deploy the RM. It means : Send Xmpp Message Using OMF protocol to enroll the node into the experiment. """ ## For performance test if self.perf: self.begin_deploy_time = tnow() self.perf = False if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not self.get('version'): msg = "Version of OMF is not indicated" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self._omf_api : self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id = self.exp_id) if not self.get('hostname') : msg = "Hostname's value is not initialized" self.error(msg) raise RuntimeError, msg if self.get('version') == "5": self._omf_api.enroll_host(self.get('hostname')) else: self._omf_api.enroll_topic(self.get('hostname')) super(OMFNode, self).do_deploy()
def do_release(self): """ Clean the RM at the end of the experiment and release the API. """ ## For performance test if self.rperf: self.begin_release_time = tnow() self.rperf = False if self._omf_api: if self.get('version') == "6" and self._topic_app: if not self.release_id: self.release_id = os.urandom(16).encode('hex') self._omf_api.frcp_release( self.release_id, self.node.get('hostname'),self._topic_app, res_id=self._topic_app) if self._release_cnt < confirmation_counter: cid = self.check_release(self.release_id) if not cid: self._release_cnt +=1 self.ec.schedule(reschedule_check, self.release) return else: msg = "Couldn't retrieve the confirmation of the release" self.error(msg) # Remove the stdout and stderr of the application try: os.remove('/tmp/'+self._topic_app +'.out') os.remove('/tmp/'+self._topic_app +'.err') except OSError: pass OMFAPIFactory.release_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id = self.exp_id) super(OMFApplication, self).do_release()
def _process(self): """ Process scheduled tasks. .. note:: Tasks are scheduled by invoking the schedule method with a target callback and an execution time. The schedule method creates a new Task object with that callback and execution time, and pushes it into the '_scheduler' queue. The execution time and the order of arrival of tasks are used to order the tasks in the queue. The _process method is executed in an independent thread held by the ExperimentController for as long as the experiment is running. This method takes tasks from the '_scheduler' queue in a loop and processes them in parallel using multithreading. The environmental variable NEPI_NTHREADS can be used to control the number of threads used to process tasks. The default value is 50. To execute tasks in parallel, a ParallelRunner (PR) object is used. This object keeps a pool of threads (workers), and a queue of tasks scheduled for 'immediate' execution. On each iteration, the '_process' loop will take the next task that is scheduled for 'future' execution from the '_scheduler' queue, and if the execution time of that task is >= to the current time, it will push that task into the PR for 'immediate execution'. As soon as a worker is free, the PR will assign the next task to that worker. Upon receiving a task to execute, each PR worker (thread) will invoke the _execute method of the EC, passing the task as argument. The _execute method will then invoke task.callback inside a try/except block. If an exception is raised by the tasks.callback, it will be trapped by the try block, logged to standard error (usually the console), and the task will be marked as failed. """ self._nthreads = int( os.environ.get("NEPI_NTHREADS", str(self._nthreads))) self._runner = ParallelRun(maxthreads=self.nthreads) self._runner.start() while not self._stop: try: self._cond.acquire() task = self._scheduler.next() if not task: # No task to execute. Wait for a new task to be scheduled. self._cond.wait() else: # The task timestamp is in the future. Wait for timeout # or until another task is scheduled. now = tnow() if now < task.timestamp: # Calculate timeout in seconds timeout = tdiffsec(task.timestamp, now) # Re-schedule task with the same timestamp self._scheduler.schedule(task) task = None # Wait timeout or until a new task awakes the condition self._cond.wait(timeout) self._cond.release() if task: # Process tasks in parallel self._runner.put(self._execute, task) except: import traceback err = traceback.format_exc() self.logger.error( "Error while processing tasks in the EC: %s" % err) # Set the EC to FAILED state self._state = ECState.FAILED # Set the FailureManager failure level to EC failure self._fm.set_ec_failure() self.logger.debug("Exiting the task processing loop ... ") self._runner.sync() self._runner.destroy()
def do_deploy(self): """ Deploy the RM. It means : Get the xmpp client and send messages using OMF 5.4 or 6 protocol to configure the interface. """ if not self.node or self.node.state < ResourceState.READY: self.debug("---- RESCHEDULING DEPLOY ---- node state %s " % self.node.state) self.ec.schedule(self.reschedule_delay, self.deploy) return if not self.channel or self.channel.state < ResourceState.READY: self.debug("---- RESCHEDULING DEPLOY ---- channel state %s " % self.channel.state) self.ec.schedule(self.reschedule_delay, self.deploy) return ## For performance test if self.perf: self.begin_deploy_time = tnow() self.perf = False self.set('xmppUser', self.node.get('xmppUser')) self.set('xmppServer', self.node.get('xmppServer')) self.set('xmppPort', self.node.get('xmppPort')) self.set('xmppPassword', self.node.get('xmppPassword')) self.set('version', self.node.get('version')) if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self._omf_api: self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id=self.exp_id) if not (self.get('name')): msg = "Interface's name is not initialized" self.error(msg) raise RuntimeError, msg if not (self.get('mode') and self.get('essid') \ and self.get('hw_mode') and self.get('ip')): msg = "Interface's variable are not initialized" self.error(msg) raise RuntimeError, msg if self.get('version') == "5": res = self.configure_on_omf5() else: res = self.configure_on_omf6() if res: super(OMFWifiInterface, self).do_deploy()
def _needs_reschedule(self, group, state, time): """ Internal method that verify if 'time' has elapsed since all elements in 'group' have reached state 'state'. :param group: Group of RMs to wait for (list of guids) :type group: int or list of int :param state: State to wait for on all RM in group. (either 'STARTED', 'STOPPED' or 'READY') :type state: str :param time: Time to wait after 'state' is reached on all RMs in group. (e.g. '2s') :type time: str .. note : time should be written like "2s" or "3m" with s for seconds, m for minutes, h for hours, ... If for example, you need to wait 2min 30sec, time could be "150s" or "2.5m". For the moment, 2m30s is not a correct syntax. """ reschedule = False delay = self.reschedule_delay # check state and time elapsed on all RMs for guid in group: rm = self.ec.get_resource(guid) # If one of the RMs this resource needs to wait for has FAILED # and is critical we raise an exception if rm.state == ResourceState.FAILED: if not rm.get('critical'): continue msg = "Resource can not wait for FAILED RM %d. Setting Resource to FAILED" raise RuntimeError, msg # If the RM state is lower than the requested state we must # reschedule (e.g. if RM is READY but we required STARTED). if rm.state < state: reschedule = True break # If there is a time restriction, we must verify the # restriction is satisfied if time: if state == ResourceState.DISCOVERED: t = rm.discover_time if state == ResourceState.PROVISIONED: t = rm.provision_time elif state == ResourceState.READY: t = rm.ready_time elif state == ResourceState.STARTED: t = rm.start_time elif state == ResourceState.STOPPED: t = rm.stop_time elif state == ResourceState.RELEASED: t = rm.release_time else: break # time already elapsed since RM changed state waited = "%fs" % tdiffsec(tnow(), t) # time still to wait wait = tdiffsec(stabsformat(time), stabsformat(waited)) if wait > 0.001: reschedule = True delay = "%fs" % wait break return reschedule, delay
def do_deploy(self): """ Deploy the RM. It means nothing special for an application for now (later it will be upload sources, ...) It becomes DEPLOYED after the topic for the application has been created """ if not self.node or self.node.state < ResourceState.READY: self.debug("---- RESCHEDULING DEPLOY ---- node state %s " % self.node.state ) self.ec.schedule(self.reschedule_delay, self.deploy) return ## For performance test if self.dperf: self.begin_deploy_time = tnow() self.dperf = False self._init_command() self.set('xmppUser',self.node.get('xmppUser')) self.set('xmppServer',self.node.get('xmppServer')) self.set('xmppPort',self.node.get('xmppPort')) self.set('xmppPassword',self.node.get('xmppPassword')) self.set('version',self.node.get('version')) if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self.get('command') : msg = "Application's Command is not initialized" self.error(msg) raise RuntimeError, msg if not self._omf_api : self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id = self.exp_id) if self.get('version') == "5": self.begin_deploy_time = tnow() if self.get('sources'): gateway = ResourceGateway.AMtoGateway[self.get('xmppServer')] user = self.get('sshUser') or self.get('xmppUser') dst = user + "@"+ gateway + ":" (out, err), proc = sshfuncs.rcopy(self.get('sources'), dst) else : # For OMF 6 : if not self.create_id: props = {} if self.get('command'): props['application:binary_path'] = self.get('command') props['application:hrn'] = self.get('command') props['application:membership'] = self._topic_app props['application:type'] = "application" self.create_id = os.urandom(16).encode('hex') self._omf_api.frcp_create( self.create_id, self.node.get('hostname'), "application", props = props) if self._create_cnt > confirmation_counter: msg = "Couldn't retrieve the confirmation of the creation" self.error(msg) raise RuntimeError, msg uid = self.check_deploy(self.create_id) if not uid: self._create_cnt +=1 self.ec.schedule(reschedule_check, self.deploy) return self._topic_app = uid self._omf_api.enroll_topic(self._topic_app) super(OMFApplication, self).do_deploy()
def do_deploy(self): """ Deploy the RM. It means nothing special for an application for now (later it will be upload sources, ...) It becomes DEPLOYED after the topic for the application has been created """ if not self.node or self.node.state < ResourceState.READY: self.debug("---- RESCHEDULING DEPLOY ---- node state %s " % self.node.state) self.ec.schedule(self.reschedule_delay, self.deploy) return ## For performance test if self.dperf: self.begin_deploy_time = tnow() self.dperf = False self._init_command() self.set('xmppUser', self.node.get('xmppUser')) self.set('xmppServer', self.node.get('xmppServer')) self.set('xmppPort', self.node.get('xmppPort')) self.set('xmppPassword', self.node.get('xmppPassword')) self.set('version', self.node.get('version')) if not self.get('xmppServer'): msg = "XmppServer is not initialzed. XMPP Connections impossible" self.error(msg) raise RuntimeError, msg if not (self.get('xmppUser') or self.get('xmppPort') or self.get('xmppPassword')): msg = "Credentials are not all initialzed. Default values will be used" self.warn(msg) if not self.get('command'): msg = "Application's Command is not initialized" self.error(msg) raise RuntimeError, msg if not self._omf_api: self._omf_api = OMFAPIFactory.get_api(self.get('version'), self.get('xmppServer'), self.get('xmppUser'), self.get('xmppPort'), self.get('xmppPassword'), exp_id=self.exp_id) if self.get('version') == "5": self.begin_deploy_time = tnow() if self.get('sources'): gateway = ResourceGateway.AMtoGateway[self.get('xmppServer')] user = self.get('sshUser') or self.get('xmppUser') dst = user + "@" + gateway + ":" (out, err), proc = sshfuncs.rcopy(self.get('sources'), dst) else: # For OMF 6 : if not self.create_id: props = {} if self.get('command'): props['application:binary_path'] = self.get('command') props['application:hrn'] = self.get('command') props['application:membership'] = self._topic_app props['application:type'] = "application" self.create_id = os.urandom(16).encode('hex') self._omf_api.frcp_create(self.create_id, self.node.get('hostname'), "application", props=props) if self._create_cnt > confirmation_counter: msg = "Couldn't retrieve the confirmation of the creation" self.error(msg) raise RuntimeError, msg uid = self.check_deploy(self.create_id) if not uid: self._create_cnt += 1 self.ec.schedule(reschedule_check, self.deploy) return self._topic_app = uid self._omf_api.enroll_topic(self._topic_app) super(OMFApplication, self).do_deploy()
def _process(self): """ Process scheduled tasks. .. note:: Tasks are scheduled by invoking the schedule method with a target callback and an execution time. The schedule method creates a new Task object with that callback and execution time, and pushes it into the '_scheduler' queue. The execution time and the order of arrival of tasks are used to order the tasks in the queue. The _process method is executed in an independent thread held by the ExperimentController for as long as the experiment is running. This method takes tasks from the '_scheduler' queue in a loop and processes them in parallel using multithreading. The environmental variable NEPI_NTHREADS can be used to control the number of threads used to process tasks. The default value is 50. To execute tasks in parallel, a ParallelRunner (PR) object is used. This object keeps a pool of threads (workers), and a queue of tasks scheduled for 'immediate' execution. On each iteration, the '_process' loop will take the next task that is scheduled for 'future' execution from the '_scheduler' queue, and if the execution time of that task is >= to the current time, it will push that task into the PR for 'immediate execution'. As soon as a worker is free, the PR will assign the next task to that worker. Upon receiving a task to execute, each PR worker (thread) will invoke the _execute method of the EC, passing the task as argument. The _execute method will then invoke task.callback inside a try/except block. If an exception is raised by the tasks.callback, it will be trapped by the try block, logged to standard error (usually the console), and the task will be marked as failed. """ self._nthreads = int(os.environ.get("NEPI_NTHREADS", str(self._nthreads))) self._runner = ParallelRun(maxthreads = self.nthreads) self._runner.start() while not self._stop: try: self._cond.acquire() task = self._scheduler.next() if not task: # No task to execute. Wait for a new task to be scheduled. self._cond.wait() else: # The task timestamp is in the future. Wait for timeout # or until another task is scheduled. now = tnow() if now < task.timestamp: # Calculate timeout in seconds timeout = tdiffsec(task.timestamp, now) # Re-schedule task with the same timestamp self._scheduler.schedule(task) task = None # Wait timeout or until a new task awakes the condition self._cond.wait(timeout) self._cond.release() if task: # Process tasks in parallel self._runner.put(self._execute, task) except: import traceback err = traceback.format_exc() self.logger.error("Error while processing tasks in the EC: %s" % err) # Set the EC to FAILED state self._state = ECState.FAILED # Set the FailureManager failure level to EC failure self._fm.set_ec_failure() self.logger.debug("Exiting the task processing loop ... ") self._runner.sync() self._runner.destroy()