def test_local_string(self): with open("tests/script-with-args.sh") as reader: my_script = reader.read() self.run_one_job( SshJob(node=self.gateway(), command=RunString(my_script, "foo", "bar", "tutu"), label="test_local_string"))
def test_x11_shell(self): self.run_one_job(job=SshJob(node=self.gateway(), command=[ Run("echo DISPLAY=$DISPLAY", x11=True), RunString("""#!/bin/bash xlsfonts | head -5 """, x11=True) ]))
def test_local_string_includes(self): with open("tests/needsinclude.sh") as reader: my_script = reader.read() self.run_one_job( SshJob(node=self.gateway(), command=RunString(my_script, "some", "'more text'", remote_name="run-script-sample.sh", includes=["tests/inclusion.sh"]), label="test_local_string"))
def _run_xterm_node_shell(self, node, shell): if shell: xterm_command = RunString("""#!/bin/bash xterm """, x11=True) else: xterm_command = Run("xterm", x11=True) self.run_one_job( job=SshJob(node=node, command=[ Run("echo without x11, DISPLAY=$DISPLAY"), Run("echo with x11, DISPLAY=$DISPLAY", x11=True), xterm_command, ]))
def test_commands_verbose(self): dummy_path = "tests/dummy-10" dummy_file = Path(dummy_path).name scheduler = Scheduler() Sequence(SshJob( node=self.gateway(), verbose=True, commands=[ Run("hostname"), RunScript("tests/script-with-args.sh", "arg1", "arg2"), RunString("for i in $(seq 3); do echo verbose$i; done"), Push(localpaths=dummy_path, remotepath="."), Pull(remotepaths=dummy_file, localpath=dummy_path + ".loop"), ]), SshJob(node=LocalNode(), critical=True, commands=Run("diff {x} {x}.loop".format(x=dummy_path), verbose=True)), scheduler=scheduler) ok = scheduler.run() ok or scheduler.debrief() self.assertTrue(ok)
ip link set $ifname up # set to ad-hoc mode iw dev $ifname ibss join $netname $freq ip address add $ipaddr_mask dev $ifname """ ########## # setting up the wireless interface on both fit01 and fit02 init_node_01 = SshJob( node=node1, command=RunString( # first argument is a string containing # the script to be run remotely turn_on_wireless_script, # and now its arguments wireless_driver, wireless_interface, "10.0.0.1/24", "foobar", 2412, # verbose=True, ), required=check_lease, scheduler=scheduler, ) init_node_02 = SshJob( node=node2, command=RunString( turn_on_wireless_script, wireless_driver, wireless_interface, "10.0.0.2/24",
def test_graphics1(self): scheduler = Scheduler(critical=False) gateway = SshNode(hostname=localhostname(), username=localuser()) Sequence( SshJob( node=gateway, command='hostname', ), SshJob(node=gateway, command=[ Run('ls /etc/passwd'), Run('wc -l /etc/passwd'), ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo with RunString on $(hostname) at $(date)"), ]), SshJob(node=gateway, commands=[ RunScript("tests/testbasic.sh"), ]), SshJob(node=gateway, commands=[ Run('wc -l /etc/passwd'), RunString( "#!/usr/bin/env bash\n" "echo with RunsString on $(hostname) at $(date)", remote_name="show-host-date"), RunScript("tests/testbasic.sh"), ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n", 10) ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n", 10, remote_name='short-show-args') ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n" "echo second arg is $2\n" "echo third arg is $3\n" "echo fourth arg is $4\n", 100, 200, 300, 400) ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n" "echo second arg is $2\n" "echo third arg is $3\n" "echo fourth arg is $4\n", 1000, 2000, 3000, 4000, remote_name='long-show-args') ]), SshJob(node=gateway, commands=[ RunString( "#!/usr/bin/env bash\n" "echo first arg is $1\n" "echo second arg is $2\n" "echo third arg is $3\n" "echo fourth arg is $4\n", 1000, 2000, 3000, 4000, remote_name='long-show-args', label='snip') ]), SshJob(node=gateway, commands=[ Run("hostname", label="Run()"), RunScript("foobar", label="RunScript()"), RunString("foobar", label="RunString()"), Push("foobar", remotepath="remote", label="Push()"), Pull("remote", localpath="foobar", label="Pull()"), Run("hostname", label=None), RunScript("foobar", label=[]), RunString("foobar", label=0), Push("foobar", remotepath="remote", label={}), Pull("remote", localpath="foobar", label=""), ]), scheduler=scheduler, ) print("NO DETAILS") scheduler.list() print("WITH DETAILS") scheduler.list(details=True) produce_png(scheduler, "test_graphics1") ok = scheduler.run() self.assertFalse(ok)
net_intf = SshJob(node=node, command=[ Run('mkdir', '/root/captures', '/root/errors'), Run('ifconfig', 'data', ip_address, 'up') ], required=load_images, scheduler=scheduler) net_intfs.append(net_intf) # ------------- # Installing Distrinet in the client/master node install_script = read_from_file('install_script.sh') install = SshJob(node=nodes[master], command=RunString(install_script), required=tuple(net_intfs), scheduler=scheduler) # ------------- # Loading image tarballs load_tarballs = SshJob(node=faraday, command=Run('scp', '-o StrictHostKeyChecking=no', '~/VoD/*.tar.gz', 'root@fit01:'), required=install, scheduler=scheduler) # -------------
sudo ifconfig tap0 hw ether 08:00:2e:00:00:01 sudo ifconfig tap0 10.1.2.1 netmask 255.255.255.0 up sed -i 's/geteuid/getppid/' /usr/bin/vlc ifconfig data promisc up """ # following two inits should be done only when load_images is true if args.load_images: init_server = SshJob( node = server, scheduler = scheduler, required = green_light, commands = [ Run("turn-on-data"), RunString(server_init_script, label="init server node"), ], ) init_client = SshJob( node = client, scheduler = scheduler, required = green_light, commands = [ Run("turn-on-data"), RunString(client_init_script, label="init client node"), ], ) if args.load_images: init_done = (init_server,init_client)
def run(self, verbose, no_load, no_save): """ can skip the load or save phases """ print("Using node {} through gateway {}".format( self.node, self.gateway)) print("In order to produce {} from {}".format(self.to_image, self.from_image)) print("The following scripts will be run:") for i, script in enumerate(self.scripts, 1): print("{:03d}:{}".format(i, " ".join(script))) items = [] if no_load: items.append("skip load") if no_save: items.append("skip save") if items: print("WARNING: using fast-track mode {}".format( ' & '.join(items))) self.locate_companion_shell() if verbose: print("Located companion in {}".format(self.companion)) if verbose: print("Preparing tar of input shell scripts .. ", end="") tarfile = self.prepare_tar(self.to_image) if verbose: print("Done in {}".format(tarfile)) keys = load_agent_keys() if verbose: print("We have found {} keys in the ssh agent".format(len(keys))) #################### the 2 nodes we need to talk to gateway_proxy = None gwuser, gwname = self.user_host(self.gateway) gateway_proxy = None if not gwuser else SshNode( hostname=gwname, username=gwuser, keys=keys, formatter=ColonFormatter(verbose=verbose), ) # really not sure it makes sense to use username other than root username, nodename = self.user_host(self.node) node_proxy = SshNode( gateway=gateway_proxy, hostname=nodename, username=username, keys=keys, formatter=ColonFormatter(verbose=verbose), ) banner = 20 * '=' # now that node_proxy is initialized, we need to # have a valid gateway_proxy for when we run all this from inside # the gateway if gateway_proxy is None: print("WARNING: build-image is designed to be run on your laptop") # best-effort, not even tested.... gateway_proxy = LocalNode() #################### the little pieces sequence = Sequence( PrintJob("Checking for a valid lease"), # bail out if we don't have a valid lease SshJob(node = gateway_proxy, command = "rhubarbe leases --check", critical = True), PrintJob("loading image {}".format(self.from_image) if not no_load else "fast-track: skipping image load", banner = banner, # label = "welcome message", ), SshJob( node = gateway_proxy, commands = [ Run("rhubarbe", "load", "-i", self.from_image, nodename) \ if not no_load else None, Run("rhubarbe", "wait", "-v", "-t", "240", nodename), ], # label = "load and wait image {}".format(self.from_image), ), SshJob( node = node_proxy, commands = [ Run("rm", "-rf", "/etc/rhubarbe-history/{}".format(self.to_image)), Run("mkdir", "-p", "/etc/rhubarbe-history"), Push(localpaths = tarfile, remotepath = "/etc/rhubarbe-history"), RunScript(self.companion, nodename, self.from_image, self.to_image), Pull(localpath = "{}/logs/".format(self.to_image), remotepaths = "/etc/rhubarbe-history/{}/logs/".format(self.to_image), recurse = True), ], label = "set up and run scripts in /etc/rhubarbe-history/{}".format(self.to_image)), ) # avoid creating an SshJob with void commands if self.extra_logs: sequence.append( SshJob( node=node_proxy, label="collecting extra logs", critical=False, commands=[ Pull(localpath="{}/logs/".format(self.to_image), remotepaths=extra_log, recurse=True) for extra_log in self.extra_logs ], )) # creating these as critical = True means the whole # scenario will fail if these are not found for binary in self.expected_binaries: check_with = "ls" if os.path.isabs(binary) else ("type -p") sequence.append( Sequence( PrintJob( "Checking for expected binaries", # label = "message about checking" ), SshJob( node=node_proxy, command=[check_with, binary], # label = "Checking for {}".format(binary) ))) # xxx some flag if no_save: sequence.append( PrintJob("fast-track: skipping image save", banner=banner)) else: sequence.append( Sequence( PrintJob("saving image {} ...".format(self.to_image), banner=banner), # make sure we capture all the logs and all that # mostly to test RunString SshJob( node=node_proxy, command=RunString("sync ; sleep $1; sync; sleep $1", 1), # label = 'sync', ), SshJob( node=gateway_proxy, command=Run("rhubarbe", "save", "-o", self.to_image, nodename), # label = "save image {}".format(self.to_image), ), SshJob( node=gateway_proxy, command="rhubarbe images -d", # label = "list current images", ), )) sched = Scheduler(sequence, verbose=verbose) # sanitizing for the cases where some pieces are left out sched.sanitize() print(20 * '+', "before run") sched.list(details=verbose) print(20 * 'x') if sched.orchestrate(): if verbose: print(20 * '+', "after run") sched.list() print(20 * 'x') print("image {} OK".format(self.to_image)) return True else: print("Something went wrong with image {}".format(self.to_image)) print(20 * '+', "after run - KO") sched.debrief() print(20 * 'x') return False
# a convenient way to create many jobs in a single pass is # to build a list of jobs using a python comprehension turn_on_datas = [ SshJob( node = node, command = Run("turn-on-data"), required = push_job, scheduler = scheduler, ) for node in nodes ] ########## next : run a sender on node1 and a receiver on node 2 # start the receiver - this of course returns immediately SshJob( node = node2, commands = [ RunString(receiver_manager_script, "start", netcat_port, "RANDOM", remote_name = "receiver-manager"), ], required = turn_on_datas, scheduler = scheduler, ) transfer_job = Sequence( # start the sender SshJob( node = node1, # ignore netcat result critical = False, commands = [ # let the server warm up just in case Run("sleep 1"),
def main(argv): if len(argv) == 3: print("!! Unfinished routines !!") else: print("++ Using default settings ++") ########################### ## Local Variables # platform='multiGPU' platform='distributed' gateway_user='******' gateway_host='gw_host' node_username='******' ######################################################### ## Distributed Requirements num_ps = 1 num_workers = 2 ######################################################### gateway = SshNode( gateway_host, username=gateway_user ) ########################################################## elif platform == 'distributed': ## Jetson-TX2 Cluster hosts = [cluster_ip_host] ######################################################### ## Use the Server node for processing the first satge Data-mining server = ResourceManager._set_Node(master_host, master_user, gateway,) ############################ # Push the launch file (run_splitpoint) # With the Parameters Connfiguration on the server # To execute the First Satege in this host job_launch_S1 = SshJob( node = server, commands = [ ## Run the script locate in the laptop RunScript("run_dataspworkers_mlp.sh", platform, num_ps, num_workers), Run("echo Split Data DONE"), ], ) ############################# ## A collection of the PS node ps = [] [ps.append(ResourceManager._set_Node(hosts[i], node_username, gateway,)) for i in range(num_ps)] ############################# ## A collection of the workers node workers = [] [workers.append(ResourceManager._set_Node(hosts[num_ps+i], node_username, gateway,)) for i in range(num_workers)] ######################################################### ## Setting Parameters for the First Stage FEATURES_NAME = "FULL-W1_x1_x2_x3_x4_x5_x7_x8_Y1" SANDBOX=str("/data_B/datasets/drg-PACA/healthData/sandbox-"+FEATURES_NAME) YEAR=str(2008) ## Stage 1 # localdir = "/1_Mining-Stage/" # SP_Dir_X = str(SANDBOX+localdir+"BPPR-"+FEATURES_NAME+"-"YEAR) ############################# ## Setting parameters for the Second Stage S_PLOINT = str(3072) #1536) #SP_ARGV = str(S_PLOINT+"-"+platform) SP_ARGV = platform+"-"+str(num_workers) SP2=str(SANDBOX+"/2_Split-Point-"+SP_ARGV+"/") ############################# ## BPPR Directories dir_train = "/data_training/" dir_valid = "/data_valid/" dir_test = "/data_test/" ############################ ## Worker data management worker_healthData = "/opt/diagnosenet/healthData/" worker_sandbox = str(worker_healthData+"/sandbox-"+FEATURES_NAME) worker_splitpoint = str(worker_sandbox+"/2_Split-Point-"+SP_ARGV+"/") worker_train = str(worker_splitpoint+dir_train) worker_valid = str(worker_splitpoint+dir_valid) worker_test = str(worker_splitpoint+dir_test) ############################ ## Worker commands mkd_worker_sandbox = str("mkdir"+" "+worker_sandbox) mkd_worker_splitpoint = str("mkdir"+" "+worker_splitpoint) mkd_worker_train = str("mkdir"+" "+worker_train) mkd_worker_valid = str("mkdir"+" "+worker_valid) mkd_worker_test = str("mkdir"+" "+worker_test) ############################# ## Create a JOB to build the sandbox for each Worker job_build_sandbox = [] [ job_build_sandbox.append(SshJob( node = workers[i], commands = [ RunString(mkd_worker_sandbox), RunString(mkd_worker_splitpoint), RunString(mkd_worker_train), RunString(mkd_worker_valid), RunString(mkd_worker_test), Run("echo SANDBOX ON WORKER DONE"), ], )) for i in range(len(workers)) ] ############################# ## Create a command for transfer data scp = "scp" cmd_X_train_transfer = [] cmd_y_train_transfer = [] cmd_X_valid_transfer = [] cmd_y_valid_transfer = [] cmd_X_test_transfer = [] cmd_y_test_transfer = [] for i in range(num_workers): worker_host = str(node_user+"@"+ hosts[num_ps+i] +":") num_file = str(i+1) ## Commands to transfer Training dataset X_train_splitted = str(SP2+dir_train+"X_training-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt") cmd_X_train_transfer.append(str(scp+" "+X_train_splitted+" "+worker_host+worker_train)) y_train_splitted = str(SP2+dir_train+"y_training-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt") cmd_y_train_transfer.append(str(scp+" "+y_train_splitted+" "+worker_host+worker_train)) ## Commands to transfer Validation dataset X_valid_splitted = str(SP2+dir_valid+"X_valid-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt") cmd_X_valid_transfer.append(str(scp+" "+X_valid_splitted+" "+worker_host+worker_valid)) y_valid_splitted = str(SP2+dir_valid+"y_valid-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt") cmd_y_valid_transfer.append(str(scp+" "+y_valid_splitted+" "+worker_host+worker_valid)) ## Commands to transfer Test dataset X_test_splitted = str(SP2+dir_test+"X_test-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt") cmd_X_test_transfer.append(str(scp+" "+X_test_splitted+" "+worker_host+worker_test)) y_test_splitted = str(SP2+dir_test+"y_test-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt") cmd_y_test_transfer.append(str(scp+" "+y_test_splitted+" "+worker_host+worker_test)) ############################ ## Build a JOB for transfering data to each worker sandbox job_data_transfer = [] [job_data_transfer.append(SshJob( node = server, commands = [ RunString(cmd_X_train_transfer[i]), RunString(cmd_y_train_transfer[i]), Run("echo SENDER TRAINING DATA DONE"), RunString(cmd_X_valid_transfer[i]), RunString(cmd_y_valid_transfer[i]), Run("echo SENDER VALID DATA DONE"), RunString(cmd_X_test_transfer[i]), RunString(cmd_y_test_transfer[i]), Run("echo SENDER TEST DATA DONE"), ],) ) for i in range(len(workers))] ######################################################### ## Create a sequence orchestration scheduler instance upfront worker_seq = [] ## Add the Stage-1 JOB into Scheduler worker_seq.append(Scheduler(Sequence( job_launch_S1))) ## Add the worker JOBs into Scheduler [worker_seq.append(Scheduler(Sequence( job_build_sandbox[i], job_data_transfer[i], )) ) for i in range(len(workers))] ############################# ## Old method ## Add the JOB PS Replicas into Scheduler # worker_seq.append(Scheduler(Sequence( # job_PS_replicas))) # # ## Add the JOB WORKER Replicas into Scheduler # worker_seq.append(Scheduler(Sequence( # job_WORKER_replicas))) ############################# ## Run the Sequence JOBS # [seq.orchestrate() for seq in worker_seq] ######################################################### ######################################################### ## Push the launch file (run_secondstage_distributed) ## With the Distributed Parameters for each worker replicas ## To distributed training of Unsupervised Embedding ############################# ## Build a collection of TensorFlow Hosts for PS tf_ps = [] [tf_ps.append(str(hosts[i]+":2222")) for i in range(num_ps)] # print("+++ tf_ps: {}".format(tf_ps)) tf_ps=','.join(tf_ps) ############################# ## Build a collection of TensorFlow Hosts for workers tf_workers = [] [tf_workers.append(str(hosts[num_ps+i]+":2222")) for i in range(num_workers)] # print("+++ tf_workers: {}".format(tf_workers)) tf_workers=','.join(tf_workers) job_PS_replicas = [] [job_PS_replicas.append(SshJob( node = ps[i], commands = [ ## Launches local script to execute on cluster # RunScript("run_secondstage_distributed.sh", # platform, tf_ps, tf_workers, # num_ps, num_workers, "ps", i), RunScript("run_thirdstage_distributed_mlp.sh", platform, tf_ps, tf_workers, num_ps, num_workers, "ps", i), Run("echo PS REPLICA DONE"), ],) ) for i in range(len(ps))] job_WORKER_replicas = [] [job_WORKER_replicas.append(SshJob( node = workers[i], commands = [ ## Launches local script to execute on cluster # RunScript("run_secondstage_distributed.sh", # platform, tf_ps, tf_workers, # num_ps, num_workers, "worker", i), RunScript("run_thirdstage_distributed_mlp.sh", platform, tf_ps, tf_workers, num_ps, num_workers, "worker", i), Run("echo WORKER REPLICA DONE"), ], ) ) for i in range(len(workers))] ############################# ### Simultaneous jobs s_distraining = Scheduler() [s_distraining.add(job_PS_replicas[i]) for i in range(len(ps))] [s_distraining.add(job_WORKER_replicas[i]) for i in range(len(workers))] s_distraining.run(jobs_window = int(num_ps+num_workers+1))
iw dev $ifname set type ibss ip link set $ifname up # set to ad-hoc mode iw dev $ifname ibss join $netname $freq ip address add $ipaddr_mask dev $ifname """ ########## # setting up the wireless interface on both fit01 and fit02 init_node_01 = SshJob( node=node1, command=RunString( turn_on_wireless_script, wireless_driver, "foobar", 2412, # setting a remote_name allows to # improve the graphical rendering remote_name='turn-on-wireless' # verbose=True, ), required=check_lease, scheduler=scheduler, ) init_node_02 = SshJob( node=node2, command=RunString(turn_on_wireless_script, wireless_driver, "foobar", 2412, remote_name='turn-on-wireless'), required=check_lease,