def recv_cfg(self, msg): """receive cfg/topology configuration""" if msg.type != ConfigurationMessage.ident: raise MessageProcessingError('unexpected message: %s' % msg) self.logger.debug('got channel configuration') # gw node name hostname = _getshorthostname() if not msg.gateway: self.nodename = hostname self.logger.warn('gw name not provided, using system hostname %s', self.nodename) else: self.nodename = msg.gateway self.logger.debug('using gateway node name %s', self.nodename) if self.nodename.lower() != hostname.lower(): self.logger.debug('gw name %s does not match system hostname %s', self.nodename, hostname) # topology task_self().topology = self.topology = msg.data_decode() self.logger.debug('decoded propagation tree') self.logger.debug('\n%s', self.topology) self.setup = True self._ack(msg)
def test_rank_placeholder(self): """test ExecWorker with several nodes and %n (rank)""" nodes = "localhost,%s" % HOSTNAME self.execw(nodes=nodes, handler=None, command="echo %n") self.assertEqual(task_self().max_retcode(), 0) self.assertEqual([str(msg) for msg, _ in task_self().iter_buffers()], ['0', '1'])
def check_etat_noeud2(etatnoeud_IHM, nodeset): etatnoeud_IHM.listWidget.clear() etatnoeud_IHM.listWidget_2.clear() etatnoeud_IHM.listWidget_3.clear() i = 1 print nodeset for node in nodeset: cli = "echo Hello" taske = task_self() taske.shell(cli, nodes=node) taske.run() for output, nodelist in task_self().iter_buffers(): if (output == "Hello"): etatnoeud_IHM.listWidget.insertItem( i, "%s" % (NodeSet.fromlist(nodelist))) i = i + 1 else: etatnoeud_IHM.listWidget_2.insertItem( i, "%s" % (NodeSet.fromlist(nodelist))) i = i + 1 etatnoeud_IHM.sortie.append(output) print "output: %s" % output
def test_004_workerclass(self): """test Defaults workerclass""" self.defaults.distant_workername = 'pdsh' task_terminate() task = task_self(self.defaults) self.assertTrue(task.default("distant_worker") is WorkerPdsh) self.defaults.distant_workername = 'ssh' self.assertTrue(task.default("distant_worker") is WorkerPdsh) task_terminate() task = task_self(self.defaults) self.assertTrue(task.default("distant_worker") is WorkerSsh) task_terminate() dname = make_temp_dir() modfile = open(os.path.join(dname, 'OutOfTree.py'), 'w') modfile.write(dedent(""" class OutOfTreeWorker(object): pass WORKER_CLASS = OutOfTreeWorker""")) modfile.flush() modfile.close() sys.path.append(dname) self.defaults.distant_workername = 'OutOfTree' task = task_self(self.defaults) self.assertTrue(task.default("distant_worker").__name__ is 'OutOfTreeWorker') task_terminate() shutil.rmtree(dname, ignore_errors=True)
def test_node_placeholder(self): """test ExecWorker with several nodes and %h (host)""" nodes = "localhost,%s" % HOSTNAME self.execw(nodes=nodes, handler=None, command="echo %h") self.assertEqual(task_self().max_retcode(), 0) self.assertEqual(task_self().node_buffer('localhost'), 'localhost') self.assertEqual(task_self().node_buffer(HOSTNAME), HOSTNAME)
def clush_excepthook(extype, exp, traceback): """Exceptions hook for clush: this method centralizes exception handling from main thread and from (possible) separate task thread. This hook has to be previously installed on startup by overriding sys.excepthook and task.excepthook.""" try: raise exp except ClushConfigError as econf: print >> sys.stderr, "ERROR: %s" % econf clush_exit(1) except KeyboardInterrupt as kbe: uncomp_nodes = getattr(kbe, 'uncompleted_nodes', None) if uncomp_nodes: print >> sys.stderr, \ "Keyboard interrupt (%s did not complete)." % uncomp_nodes else: print >> sys.stderr, "Keyboard interrupt." clush_exit(128 + signal.SIGINT) except OSError as exp: print >> sys.stderr, "ERROR: %s" % exp if exp.errno == errno.EMFILE: print >> sys.stderr, "ERROR: current `nofile' limits: " \ "soft=%d hard=%d" % resource.getrlimit(resource.RLIMIT_NOFILE) clush_exit(1) except GENERIC_ERRORS as exc: clush_exit(handle_generic_error(exc)) # Error not handled task_self().default_excepthook(extype, exp, traceback)
def test_one_action_error(self): """A group with only an error action is on error""" act = TestAction('/bin/false') self.grp.add(act) self.grp.launch() task_self().run() self.assertEqual(act.status(), ACT_ERROR) self.assertEqual(self.grp.status(), ACT_ERROR)
def test_rcopy(self): """test file rcopy setup in tree mode (1 gateway)""" self.test_ok = False self.tfile = make_temp_file("dummy-src") self.tdir = make_temp_dir() task_self().rcopy(self.tfile.name, self.tdir, "n60") task_self().resume() self.assertTrue(self.test_ok)
def test_one_action(self): """A group with its only action both run ok""" act = TestAction('/bin/true') self.grp.add(act) self.grp.launch() task_self().run() self.assertEqual(act.status(), ACT_OK) self.assertEqual(self.grp.status(), ACT_OK)
def test_003_io_pipes(self): """test StreamWorker bound to pipe readers and writers""" # os.write -> pipe1 -> worker -> pipe2 -> os.read class TestH(EventHandler): def __init__(self, testcase): self.testcase = testcase self.worker = None self.pickup_count = 0 self.hup_count = 0 def ev_pickup(self, worker): self.pickup_count += 1 def ev_read(self, worker): self.testcase.assertEqual(worker.current_sname, "pipe1") worker.write(worker.current_msg, "pipe2") def ev_timer(self, timer): # call set_write_eof on specific stream after some delay worker = self.worker self.worker = 'DONE' worker.set_write_eof("pipe2") def ev_hup(self, worker): # ev_hup called at the end (after set_write_eof is called) self.hup_count += 1 self.testcase.assertEqual(self.worker, 'DONE') # no rc code should be set self.testcase.assertEqual(worker.current_rc, None) # create a StreamWorker instance bound to several pipes hdlr = TestH(self) worker = StreamWorker(handler=hdlr) hdlr.worker = worker rfd1, wfd1 = os.pipe() worker.set_reader("pipe1", rfd1) os.write(wfd1, b"Some data\n") os.close(wfd1) rfd2, wfd2 = os.pipe() worker.set_writer("pipe2", wfd2) timer1 = task_self().timer(1.0, handler=hdlr) self.run_worker(worker) self.assertEqual(os.read(rfd2, 1024), b"Some data") os.close(rfd2) # wfd2 should be closed by CS self.assertRaises(OSError, os.close, wfd2) # rfd1 should be closed by CS self.assertRaises(OSError, os.close, rfd1) # check pickup/hup self.assertEqual(hdlr.hup_count, 1) self.assertEqual(hdlr.pickup_count, 1) self.assertTrue(task_self().max_retcode() is None)
def test_copy(self): """test file copy setup in tree mode (1 gateway)""" self.test_ok = False self.tfile = make_temp_file("dummy") # add leading '/' like clush so that WorkerTree knows it's a dir task_self().copy(self.tfile.name, join(dirname(self.tfile.name), ''), "n60") task_self().resume() self.assertTrue(self.test_ok)
def _already_done(self): """Return a Result object is the target is already unmounted.""" if self.comp.is_stopped(): return Result(message="%s is already stopped" % self.comp.label) # LBUG #18624 if not self.comp.dev_isblk: task_self().set_info("fanout", 1) return None
def _already_done(self): """Raise an exception if the target is mounted.""" self.comp.raise_if_started("Cannot %s" % self.NAME) # LBUG #18624 : workaround for "multiple mkfs.lustre on loop devices" if not self.comp.dev_isblk: # configure one engine client max per task (sequential, bah.) task_self().set_info("fanout", 1) return None
def test_two_actions_2_errors(self): """A group with 2 error actions is on error""" act1 = TestAction('/bin/false') act2 = TestAction('/bin/false') self.grp.add(act1) self.grp.add(act2) self.grp.launch() task_self().run() self.assertEqual(act1.status(), ACT_ERROR) self.assertEqual(act2.status(), ACT_ERROR) self.assertEqual(self.grp.status(), ACT_ERROR)
def test_two_actions_ok_and_error(self): """A group with a ok action and an error action is on error""" act1 = TestAction('/bin/true') act2 = TestAction('/bin/false') self.grp.add(act1) self.grp.add(act2) self.grp.launch() task_self().run() self.assertEqual(act1.status(), ACT_OK) self.assertEqual(act2.status(), ACT_ERROR) self.assertEqual(self.grp.status(), ACT_ERROR)
def stop(self, args): nodes = NodeSet() nbNoeud = len(args) - 2 # print'nbNoeud: %d'%nbNoeud for i in range(1, nbNoeud + 1): nodes.add(args[i]) print args[i] + ' : sudo service ' + args[nbNoeud + 1] + ' stop' task_self().run('sudo service ' + args[nbNoeud + 1] + ' stop', nodes=nodes)
def test_two_actions_ok_and_ok(self): """A group with 2 actions all run fine""" act1 = TestAction('/bin/true') act2 = TestAction('/bin/true') self.grp.add(act1) self.grp.add(act2) self.grp.launch() task_self().run() self.assertEqual(act1.status(), ACT_OK) self.assertEqual(act2.status(), ACT_OK) self.assertEqual(self.grp.status(), ACT_OK)
def test_abort_on_read(self): """test ExecWorker.abort() on read""" class TestH(EventHandler): def ev_read(self, worker): worker.abort() worker.abort() # safe but no effect self.execw(nodes='localhost', handler=TestH(), command="echo ok; tail -f /dev/null") self.assertEqual(task_self().max_retcode(), None) self.assertEqual(task_self().node_buffer('localhost'), b'ok')
def test_abort_on_close(self): """test ExecWorker.abort() on close""" class TestH(EventHandler): def ev_close(self, worker, timedout): worker.abort() worker.abort() # safe but no effect self.execw(nodes='localhost', handler=TestH(), command="echo ok; sleep .1") self.assertEqual(task_self().max_retcode(), 0) self.assertEqual(task_self().node_buffer('localhost'), b'ok')
def test_copy_wrong_directory(self): """test copying wrong directory with an ExecWorker""" srcdir = make_temp_dir() dst = make_temp_file("data") ref1 = make_temp_file("data1", dir=srcdir) try: self.execw(nodes="localhost", handler=None, source=srcdir, dest=dst.name, stderr=True) self.assertEqual(task_self().max_retcode(), 1) self.assertTrue(len(task_self().node_error("localhost")) > 0) self.assertTrue(os.path.isfile(ref1.name)) finally: del ref1 os.rmdir(srcdir)
def test_004_workerclass(self): """test Defaults workerclass""" self.defaults.distant_workername = 'pdsh' task_terminate() task = task_self(self.defaults) self.assertTrue(task.default("distant_worker") is WorkerPdsh) self.defaults.distant_workername = 'ssh' self.assertTrue(task.default("distant_worker") is WorkerPdsh) task_terminate() task = task_self(self.defaults) self.assertTrue(task.default("distant_worker") is WorkerSsh) task_terminate()
def test_copy_wrong_directory(self): """test copying wrong directory with an ExecWorker""" srcdir = make_temp_dir() dst = make_temp_file(b"data") ref1 = make_temp_file(b"data1", dir=srcdir) try: self.execw(nodes='localhost', handler=None, source=srcdir, dest=dst.name, stderr=True) self.assertEqual(task_self().max_retcode(), 1) self.assertTrue(len(task_self().node_error("localhost")) > 0) self.assertTrue(os.path.isfile(ref1.name)) finally: del ref1 os.rmdir(srcdir)
def test_one_dep_error(self): """A group with an error dependency is on error, content is not run""" act = TestAction('/bin/false') act1 = TestAction('/bin/true') self.grp.add(act1) self.grp.depends_on(act) self.grp.launch() task_self().run() self.assertEqual(act.status(), ACT_ERROR) self.assertEqual(act1.status(), ACT_WAITING) self.assertEqual(self.grp.status(), ACT_ERROR)
def test_one_dep_ok(self): """A group with one ok dependency is ok""" act = TestAction('/bin/true') act1 = TestAction('/bin/true') self.grp.add(act1) self.grp.depends_on(act) self.grp.launch() task_self().run() self.assertEqual(act.status(), ACT_OK) self.assertEqual(act1.status(), ACT_OK) self.assertEqual(self.grp.status(), ACT_OK)
def status(self, args, afficher): node0 = NodeSet() nbNoeud = len(args) - 2 #print'nbNoeud: %d'%nbNoeud for i in range(1, nbNoeud + 1): node0.add(args[i]) print 'sudo service ' + args[nbNoeud + 1] + ' status' task_self().run('sudo service ' + args[nbNoeud + 1] + ' status', nodes=node0) return self.recevoir(afficher)
def test_one_dep_after_error(self): """An action with an error group dependency is on error""" act = TestAction('/bin/true') self.grp = ActionGroup() act1 = TestAction('/bin/false') self.grp.add(act1) act.depends_on(self.grp) act.launch() task_self().run() self.assertEqual(act1.status(), ACT_ERROR) self.assertEqual(self.grp.status(), ACT_ERROR) self.assertEqual(act.status(), ACT_ERROR)
def testExecuteTasksOnNeighbors(self): """test execute tasks on directly connected machines""" tmpfile = tempfile.NamedTemporaryFile() myhost = my_node() cfgparser = load_cfg('topology1.conf') neighbor = cfgparser.get('CONFIG', 'NEIGHBOR') gateways = cfgparser.get('CONFIG', 'GATEWAYS') targets = cfgparser.get('CONFIG', 'TARGETS') tmpfile.write('[Main]\n') tmpfile.write('%s: %s\n' % (myhost, neighbor)) tmpfile.write('%s: %s\n' % (neighbor, gateways)) tmpfile.write('%s: %s\n' % (gateways, targets)) tmpfile.flush() parser = TopologyParser() parser.load(tmpfile.name) tree = parser.tree(myhost) wtree = WorkerTree(NodeSet(targets), None, 0, command='echo ok', topology=tree, newroot=myhost) # XXX Need to propagate topology for this to work in tests raise RuntimeError task = task_self() task.set_info('debug', True) task.schedule(wtree) task.resume() for buf, nodes in task.iter_buffers(): print '-' * 15 print str(nodes) print '-' * 15 print buf print ''
def testTaskScheduleTwice(self): """test task worker schedule twice error""" task = task_self() self.assert_(task != None) worker = task.shell("/bin/echo itsme") self.assertRaises(WorkerError, task.schedule, worker) task.abort()
def testInvalidCommunication(self): """test detecting invalid data upon reception""" ftest = tempfile.NamedTemporaryFile() ftest.write('<?xml version="1.0" encoding="UTF-8"?>\n') ftest.write('This is an invalid line\n') ftest.write('<channel>\n') ftest.write('</channel>\n') ## write data on the disk # actually we should do more but this seems sufficient ftest.flush() chan = _TestingChannel() task = task_self() fin = open(ftest.name) fout = open('/dev/null', 'w') worker = WorkerSimple(fin, fout, None, None, handler=chan) task.schedule(worker) self.assertRaises(MessageProcessingError, task.resume) fin.close() fout.close() ftest.close()
def testDistantChannel(self): """schyzophrenic self communication test over SSH""" # create a bunch of messages spec = { # msg type: number of samples ConfigurationMessage.ident: 1, ControlMessage.ident: 1, ACKMessage.ident: 1, ErrorMessage.ident: 1 } ftest = tempfile.NamedTemporaryFile() ftest.write('<?xml version="1.0" encoding="UTF-8"?>\n') ftest.write('<channel>\n') for mtype, count in spec.items(): for i in range(count): sample = gen_map[mtype]() sample.msgid = i ftest.write(sample.xml() + '\n') ftest.write('</channel>\n') ## write data on the disk # actually we should do more but this seems sufficient ftest.flush() task = task_self() chan = _TestingChannel() task.shell('cat ' + ftest.name, nodes='localhost', handler=chan) task.resume() ftest.close() self.assertEquals(chan.validate(spec), True)
def testWorkerNotScheduledDistant(self): """test library misusage (distant worker not scheduled)""" task = task_self() worker = WorkerSsh(HOSTNAME, command="/bin/hostname", handler=None, timeout=0) self.assert_(worker != None) task.resume() self.assertRaises(WorkerError, worker.node_buffer, HOSTNAME)
def main(): """ Main script function """ # Initialize option parser parser = optparse.OptionParser() parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="Enable debug mode") parser.add_option("-n", "--nodes", action="store", dest="nodes", default="@all", help="Target nodes (default @all group)") parser.add_option("-f", "--fanout", action="store", dest="fanout", default="128", help="Fanout window size (default 128)", type=int) parser.add_option("-t", "--timeout", action="store", dest="timeout", default="5", help="Timeout in seconds (default 5)", type=float) options, _ = parser.parse_args() # Get current task (associated to main thread) task = task_self() nodes_target = NodeSet(options.nodes) task.set_info("fanout", options.fanout) if options.debug: print "nodeset : %s" % nodes_target task.set_info("debug", True) # Create ClusterShell event handler handler = CheckNodesHandler(CheckNodesResult()) # Schedule remote command and run task (blocking call) task.run("who -b", nodes=nodes_target, handler=handler, \ timeout=options.timeout)
def get_remote_output(host_list, command, timeout=120): """Run the command on each specified host in parallel. Args: host_list (list): list of hosts command (str): command to run on each host timeout (int, optional): number of seconds to wait for all jobs to complete. Defaults to 120 seconds. Returns: Task: a Task object containing the result of the running the command on the specified hosts """ # Create a ClusterShell Task to run the command in parallel on the hosts if isinstance(host_list, list): nodes = NodeSet.fromlist(host_list) else: nodes = NodeSet(host_list) task = task_self() # task.set_info('debug', True) # Enable forwarding of the ssh authentication agent connection task.set_info("ssh_options", "-oForwardAgent=yes") print("Running on {}: {}".format(nodes, command)) task.run(command=command, nodes=nodes, timeout=timeout) return task
def command(nodeset, command): output = {} task = task_self() vlog(4,'clush_ipmi: nodeset:%s command:%s' % (nodeset, command)) if not sgi_cluster.is_sac(): vlog(1, "only run this from SAC node") return False for node in nodeset: lead = sgi_cluster.get_lead(node) if lead: if lead == socket.gethostname(): cmd = '/usr/diags/bin/bcmd -H {0} {1}'.format(sgi_cluster.get_bmc(node), command) vlog(4, 'calling bcmd on localhost: %s' % cmd) task.shell( cmd, timeout=120, handler=__OutputHandler(node, output) ) else: cmd = '/usr/diags/bin/bcmd -H {0} {1}'.format(sgi_cluster.get_bmc(node), command) vlog(4, 'calling bcmd on %s: %s' % (lead, cmd)) task.shell( cmd, nodes=lead, timeout=120, handler=__OutputHandler(node, output) ) task.run() return output
def testDistantChannel(self): """schyzophrenic self communication test over SSH""" # create a bunch of messages spec = { # msg type: number of samples ConfigurationMessage.ident: 1, ControlMessage.ident: 1, ACKMessage.ident: 1, ErrorMessage.ident: 1 } ftest = tempfile.NamedTemporaryFile() ftest.write('<?xml version="1.0" encoding="UTF-8"?>\n') ftest.write('<channel>\n') for mtype, count in spec.iteritems(): for i in xrange(count): sample = gen_map[mtype]() sample.msgid = i ftest.write(sample.xml() + '\n') ftest.write('</channel>\n') ## write data on the disk # actually we should do more but this seems sufficient ftest.flush() task = task_self() chan = _TestingChannel() task.shell('cat ' + ftest.name, nodes='localhost', handler=chan) task.resume() ftest.close() self.assertEquals(chan.validate(spec), True)
def __init__(self, task=task_self()): EventHandler.__init__(self) self.task = task # Action duration self.start = None self.duration = None
def recv_ctl(self, msg): """receive control message with actions to perform""" if msg.type == ControlMessage.ident: self.logger.debug('GatewayChannel._state_ctl') if msg.action == 'shell': data = msg.data_decode() cmd = data['cmd'] stderr = data['stderr'] timeout = data['timeout'] remote = data['remote'] #self.propagation.invoke_gateway = data['invoke_gateway'] self.logger.debug('decoded gw invoke (%s)', data['invoke_gateway']) taskinfo = data['taskinfo'] self.logger.debug('assigning task infos (%s)', data['taskinfo']) task = task_self() task._info.update(taskinfo) task.set_info('print_debug', _gw_print_debug) if task.info('debug'): self.logger.setLevel(logging.DEBUG) self.logger.debug('inherited fanout value=%d', task.info("fanout")) self.logger.debug('launching execution/enter gathering state') responder = TreeWorkerResponder(task, self, msg.srcid) self.propagation = TreeWorker(msg.target, responder, timeout, command=cmd, topology=self.topology, newroot=self.nodename, stderr=stderr, remote=remote) # FIXME ev_start-not-called workaround responder.worker = self.propagation self.propagation.upchannel = self task.schedule(self.propagation) self.logger.debug("TreeWorker scheduled") self._ack(msg) elif msg.action == 'write': data = msg.data_decode() self.logger.debug('GatewayChannel write: %d bytes', len(data['buf'])) self.propagation.write(data['buf']) self._ack(msg) elif msg.action == 'eof': self.logger.debug('GatewayChannel eof') self.propagation.set_write_eof() self._ack(msg) else: self.logger.error('unexpected CTL action: %s', msg.action) else: self.logger.error('unexpected message: %s', str(msg))
def test_perform_remote_false_action(self): """Test perform an action in remote mode=False""" action = Action('start', command='hostname -s', target='node1') action.remote = False svc = Service('Local') svc.add_action(action) mytopo = tempfile.NamedTemporaryFile('w') mytopo.write(u"[routes]\n%s: node1\n" % HOSTNAME) mytopo.flush() task_self().load_topology(mytopo.name) mytopo.close() svc.run('start') buff = action.worker.node_buffer('node1') self.assertEqual(buff, HOSTNAME)
def __init__(self): task = task_self() task.set_info("print_debug", print_csdebug) fanout = Globals().get_ssh_fanout() if fanout > 0: task.set_info("fanout", fanout)