def setup_server(server, key): client = ParallelSSHClient([server], user='******', pkey=key, allow_agent=False) output = client.run_command('rm -rf ~/server_config') utils.print_pssh_output(output) output = client.run_command('mkdir ~/server_config') utils.print_pssh_output(output) for f in os.listdir(SERVER_CONFIG_DIR): dest_path = get_server_config_path(f.split('/')[-1]) local_path = os.path.join(SERVER_CONFIG_DIR, f) fileUpload.upload_file_no_pssh([server], key, local_path, dest_path) print(f'Wrote file {local_path} to {server}') output = client.run_command('bash ~/server_config/setup_new_server.sh') utils.print_pssh_output(output) with open(utils.get_project_path('all_servers.txt')) as f: hostnames = {host.strip() for host in f.readlines()} hostnames.add(server) hostnames = sorted(hostnames) with open(utils.get_project_path('all_servers.txt'), 'w') as f: f.write('\n'.join(hostnames) + '\n') print( f"Added {server} to the fleet. Don't forget to commit autogenerated changes to all_servers.txt." )
def tryssh(url): try: client = ParallelSSHClient([url], user="******") client.run_command('ls') except pssh.exceptions.AuthenticationException: return True except pssh.exceptions.ConnectionErrorException: return False
def ssh_all_hosts(self, command): """ Sends given command to remote hosts via ssh Parameters: command: command to be sent over ssh """ hosts = self.addresses client = ParallelSSHClient(hosts, user='******') client.run_command(command) client.join()
def go(): command = 'uname -a' nodes = ['localhost'] pw = getpass.getpass() client = ParallelSSHClient(nodes, password=pw, timeout=3) client.run_command(command) output = client.get_last_output() for node in output: for line in output[node]['stdout']: print('{0} {1}'.format(node, line))
async def ssh(ctx, host, user, password, command): client = ParallelSSHClient(host, user=user, password=password) output = client.run_command(command) for host_output in output: output = "\n".join(host_output.stdout) await ctx.send(output) exit_code = host_output.exit_code
def command(self, command, timeout=60, sudo=False): """Execute a command on the instances. This will be done using an ssh command and potentially with sudo""" logger.debug(f'Executing {command} with sudo {sudo}.') client = ParallelSSHClient([i.ip for i in self.hosts], pkey=self.keysfile) output = client.run_command(command, read_timeout=timeout, sudo=sudo) client.join() return output
def shell_cmd(cloud_name, machine_id, cmd): from pssh.clients import ParallelSSHClient ##util.message("# " + str(cmd), "info") aa, bb, cc, dd, describe, ff, gg = read(cloud_name, machine_id) if describe == None: util.message("Node " + str(machine_id) + " not found", "error") return key_name = describe['key_name'] host = describe['public_ip'] hosts = host.split() username, pkey = key.read(key_name) if username == None: util.message("key file not found", "error") return ##util.message("host=" + str(hosts) + ", user="******", pkey=" + str(pkey), "info") client = ParallelSSHClient(hosts, user=username, pkey=pkey) output = client.run_command(cmd, use_pty=True, read_timeout=3600) for host_out in output: try: for line in host_out.stdout: print(line) except: time.sleep(3) continue return
def test_agent_forwarding(self): client = ParallelSSHClient(['localhost'], forward_ssh_agent=True, port=self.port) output = client.run_command(self.cmd) stdout = [list(output['localhost'].stdout) for k in output] expected_stdout = [[self.resp]] self.assertListEqual(stdout, expected_stdout)
def fix_hostnames(): hosts = get_ips() client = ParallelSSHClient(hosts, user=USER) host_args = [{ "cmd": "sudo hostnamectl set-hostname synthetic-bot-up-%s" % (i, ) } for i in range(len(hosts))] output = client.run_command("%(cmd)s", host_args=host_args) client.join()
def ssh(hosts, command): client = ParallelSSHClient(hosts, user='******', password='******') output = client.run_command(command) for host, host_output in output.items(): for line in host_output.stdout: print line f = open("Computer_Parseble_OUTPUT", "w") f.write(str(output)) f.close()
def send(cmd, user=USER, pause=0): droplets = get_servers() hosts = [d.ip_address for d in droplets] if pause == 0: client = ParallelSSHClient(hosts, user=user) output = client.run_command(cmd) # for host_output in output: # for line in host_output.stdout: # print(line) # exit_code = host_output.exit_code else: for host in hosts: client = SSHClient(host, user=user) output = client.run_command(cmd) for line in output.stdout: print(line) exit_code = output.exit_code time.sleep(pause)
def display_images(self, images): """ Displays images on remote hosts via ssh Parameters: images: images to be displayed on the screens """ new_images = [] for image in images: if image in ('all_white_screen.png', 'fixation_stimuli.png', 'all_black_screen.png'): new_images.append(f'''/home/pi/elephant_vending_machine/default_img/{image}''') else: new_images.append(f'''{self.config['REMOTE_IMAGE_DIRECTORY']}/{image}''') hosts = self.addresses client = ParallelSSHClient(hosts, user='******') self.ssh_all_hosts('xset -display :0 dpms force off') client.run_command('%s', host_args=(f'''DISPLAY=:0 feh -F -x -Y {new_images[0]} &''', \ f'''DISPLAY=:0 feh -F -x -Y {new_images[1]} &''', \ f'''DISPLAY=:0 feh -F -x -Y {new_images[2]} &''')) time.sleep(1) self.ssh_all_hosts('xset -display :0 dpms force on')
def send(cmd, pause=0, user=USER): """ Send a command to all bots """ hosts = get_ips() if pause == 0: client = ParallelSSHClient(hosts, user=user) output = client.run_command(cmd) # for host_output in output: # for line in host_output.stdout: # print(line) # exit_code = host_output.exit_code else: for host in hosts: client = SSHClient(host, user=user) output = client.run_command(cmd) # for line in output.stdout: # print(line) # exit_code = output.exit_code time.sleep(pause)
class SessionRunner: def __init__(self, config, hosts): self.config = config self.hosts = json.loads(self.config.get("system", "hosts")) if isinstance(self.hosts, dict): self.host_config = self.hosts self.hosts = self.hosts.keys() self.command_output = None self.output = None self.prepare_fileio = True self.test_type = None def connect_ssh(self, retries=6, user="******"): kwargs = {"num_retries": retries, "timeout": 10000} if hasattr(self, "host_config"): kwargs["host_config"] = self.host_config else: kwargs["user"] = user self.client = ParallelSSHClient(self.hosts, **kwargs) def run_command(self): if not hasattr(self, "client"): self.connect_ssh() if hasattr(self, "command"): try: self.command_output = self.client.run_command(self.command) except UnknownHostException as e: print(f"Unknown Host {e.host} - {e.args[2]}") except Exception as e: print(e) self.get_output() def get_output(self): self.output = {} if self.command_output: for host, host_output in self.command_output.items(): self.output[host] = { "command": self.command, "stdout": list(host_output.stdout), "stderr": "\n".join(list(host_output.stderr)), "exit_code": host_output.exit_code } def make_command(self, *args, cmd="run", run=True, threads=None): threads = f"--threads={threads or '$(nproc)'}" args = list(args) args.extend(get_params(self.config, self.test_type, cmd)) self.command = " ".join( ["sysbench", self.test_type, *args, threads, cmd]) if run: self.run_command()
def ssh(host_list, cmd, **kargs): try: client = ParallelSSHClient(host_list, user='******', pkey=f'{sq(args.ssh_key)}') output = client.run_command(cmd, **kargs) for host in output: logger.info(host) for line in output[host]['stdout']: logger.info(line) except: logger.info('cannot connect to all the hosts') return
def login_with_pool(self, ipaddress, port, user_passwd_pair_list, pool_size=10): for user_passwd_pair in user_passwd_pair_list: try: client = ParallelSSHClient(hosts=[ipaddress], port=port, user=user_passwd_pair[0], password=user_passwd_pair[1], num_retries=0, timeout=self.timeout, pool_size=pool_size) output = client.run_command('whoami', timeout=self.timeout) log_success("SSH", ipaddress, port, user_passwd_pair) except Exception as E: logger.debug('AuthenticationException: ssh') continue finally: pass
def _exec_ssh(self, cmd): client = ParallelSSHClient(self.hosts, user=self.username, pkey=self.pkey, password=self.password, port=self.port) output = client.run_command(cmd, sudo=self.sudo) nice_output = dict() for host_output in output: host = host_output.host nice_output[host] = {'stdout': [], 'stderr': []} nice_output[host]['stdout'] = list(host_output.stdout) nice_output[host]['stderr'] = list(host_output.stderr) return nice_output
def login(self, ipaddress, port, user_passwd_pair_list): parallel_logger = logging.getLogger("parallel") parallel_logger.setLevel('CRITICAL') for user_passwd_pair in user_passwd_pair_list: try: client = ParallelSSHClient(hosts=[ipaddress], port=port, user=user_passwd_pair[0], password=user_passwd_pair[1], num_retries=0, timeout=self.timeout) output = client.run_command('whoami', timeout=self.timeout) log_success("SSH", ipaddress, port, user_passwd_pair) except Exception as E: logger.debug('AuthenticationException: ssh') continue finally: pass
def parallel_ssh(host, user, password, command): #enable_host_logger() client = ParallelSSHClient([host], user=user, password=password) output = client.run_command(command) client.join() stdout = "" for host_output in output: if host_output.exit_code != 0: raise Exception("host returned exit code " + str(host_output.exit_code)) stdout_li = list(host_output.stdout) for line in stdout_li: stdout += line + "\n" return stdout
def condor_retire(self): """Retire the instances from condor. This means that jobs keep running on them, but they will not accept new jobs.""" status = self.condor_status() commands = {} for host in self.hosts: for s in status: if s['host']==host.name: commands[host.name] = s['fullhost'] commandlist = [f"condor_off -startd -peaceful {c}" for c in commands.values()] logger.debug(commandlist) server = ['server']*len(commandlist) client = ParallelSSHClient(server, pkey=self.keysfile) output = client.run_command('%s', host_args=commandlist, sudo=True)
def login_with_pool_key(self, ipaddress, port, user_passwd_pair_list, key_file_path_list, pool_size=10): for key_file_path in key_file_path_list: for user_passwd_pair in user_passwd_pair_list: # for user in users: try: client = ParallelSSHClient(hosts=[ipaddress], port=port, user=user_passwd_pair[0], pkey=key_file_path, num_retries=0, timeout=self.timeout, pool_size=pool_size) output = client.run_command('whoami', timeout=self.timeout) log_success("SSH", ipaddress, port, [user_passwd_pair[0], "key: {}".format(key_file_path)]) except Exception as E: logger.debug('AuthenticationException: ssh') continue finally: pass
def run_command(command, hosts, user, verbose=False, proxy_host=None, timeout=10, **kwargs): """Run ssh command using Parallel SSH.""" result = {"0": [], "1": []} if proxy_host: client = ParallelSSHClient(hosts, user='******', pkey=SSH_KEY, proxy_host=proxy_host, proxy_user=user, proxy_pkey=SSH_KEY, timeout=timeout) else: client = ParallelSSHClient(hosts, user=user, pkey=SSH_KEY, timeout=timeout) output = client.run_command(command, stop_on_errors=False, **kwargs) client.join(output) # output = pssh.output.HostOutput objects list for host in output: if host.exit_code == 0: if verbose and host.stdout: for line in host.stdout: print(line) result['0'].append(host.host) elif host.host is not None: result['1'].append(host.host) # find hosts that have raised Exception (Authentication, Connection) # host.exception = pssh.exceptions.* & host.host = None failed_hosts = list(set(hosts) - set(sum(result.values(), []))) if failed_hosts: result['1'].extend(failed_hosts) return result
def run_command(command, hosts, user, verbose=False, proxy_host=None, timeout=10, **kwargs): """Run ssh command using Parallel SSH.""" result = {"0": [], "1": []} if proxy_host: client = ParallelSSHClient(hosts, user='******', proxy_host=proxy_host, proxy_user=user, timeout=timeout) else: client = ParallelSSHClient(hosts, user=user, timeout=timeout) output = client.run_command(command, stop_on_errors=False, **kwargs) client.join(output) for host in hosts: if host not in output: # Pssh AuthenticationException duplicate output dict key # {'saclay.iot-lab.info': {'exception': ...}, # {'saclay.iot-lab.info_qzhtyxlt': {'exception': ...}} site = next(iter(sorted(output))) raise OpenA8SshAuthenticationException(site) result['0' if output[host]['exit_code'] == 0 else '1'].append(host) if verbose: for host in hosts: # Pssh >= 1.0.0: stdout is None instead of generator object # when you have ConnectionErrorException stdout = output[host].get('stdout') if stdout: for _ in stdout: pass return result
print("Downloading Script Files...") for host, host_output in output.items(): for line in host_output.stdout: print("Host [%s] - %s" % (host, line)) while len(s3_file_list) > 0: print(f"------------------ Start: {str(datetime.now())}-----------------") s3_proc_list = list(map(get_proc_image_command, s3_file_list[:NUM_INSTANCES])) s3_file_list = s3_file_list[NUM_INSTANCES:] s3_proc_list += [''] * (NUM_INSTANCES - len(s3_proc_list)) output = client.run_command('%s', host_args=tuple(s3_proc_list)) for command in s3_proc_list: print(f"Running: {command}") # client.join(output) print(f"------------------ End: {str(datetime.now())}-----------------") for host, host_output in output.items(): for line in host_output.stdout: print("Host [%s] - %s" % (host, line)) for inst in instance_list: inst.terminate_instance() if DOWNLOAD_DIR: os.system(f"aws s3 cp --recursive {S3_OUTPUT_DIR} {DOWNLOAD_DIR}")
#!/usr/bin/env python ''' Simple SSH c2, load & parse hosts.csv IP,user,password execute commands with sudo and print to STDOUT ''' import sys from pssh.clients import ParallelSSHClient cmd = sys.argv[1] host_lst = 'hosts.csv' with open(host_lst, 'r') as infile: data = infile.readlines() host_lst = [host.strip().split(',') for host in data] host_config = {} for host in host_lst: try: host_config[host[0]] = {'user': host[1], 'password': host[2]} except: pass hosts = host_config.keys() client = ParallelSSHClient(hosts, host_config=host_config) output = client.run_command(cmd, sudo=True) for host, host_output in output.items(): for line in host_output.stdout: print(host, line)
from pssh.utils import load_private_key from pssh.clients import ParallelSSHClient host_config = { '192.168.56.104': { 'user': '******', 'password': '******', 'port': 22, } } hosts = list(host_config.keys()) client = ParallelSSHClient(hosts * 4, host_config=host_config) output = client.run_command('%s', host_args=["uptime", "whoami", "date", "uname -a"]) for host, host_output in output.items(): for line in host_output.stdout: print(host, line)
class HPCConnection(object): def __init__(self, external_init_dict=None): self.logger = logging.getLogger(constants.logging_name) init_dict = {} clsname = self.__class__.__name__ if external_init_dict is not None: self.logger.debug( "{}: initializing from external dict".format(clsname)) init_dict = external_init_dict else: self.logger.debug( "{}: initializing with default values".format(clsname)) self.hostname = constants.hpc_hostname self.user = constants.user self.home_dir = os.path.join(constants.cc_working_dir, self.user) self.src_data_path = init_dict.get("src_data_path", "./data") self.template_path = constants.template_path self.logger.debug("Host being used is {}, under username {}".format( self.hostname, self.user)) self.keypath = init_dict.get("ssh_key_filename", constants.ssh_key_filename) self.client = ParallelSSHClient([self.hostname], pkey=self.keypath, user=self.user, keepalive_seconds=300) self.remote_abs_working_folder = None self.remote_working_folder = None self.active_dataset_name = None self.live_job_id = None def check_connection(self): status = True msg = None self.logger.debug("Testing connection...") try: self.client.run_command("ls") self.logger.debug("... ok") except ( AuthenticationException, UnknownHostException, ConnectionErrorException, ) as e: status = False msg = str(e) self.logger.debug("... failed ({})".format(msg)) return status, msg def copy_data_to_remote(self, dataset_, remote_temp_folder=None): """ Copies data contained in a local directory over to a remote destination """ self.logger.debug( "Copying data to remote location (from {} to {})".format( self.src_data_path, self.home_dir)) remote_base_path = self.home_dir local_datapath = self.src_data_path if remote_temp_folder is None: remote_temp_folder = rand_fname() full_remote_path = os.path.join(remote_base_path, remote_temp_folder) remote_tar = os.path.join(full_remote_path, "data.tar") self.remote_abs_working_folder = full_remote_path self.remote_working_folder = remote_temp_folder self.active_dataset_name = dataset_ self.logger.debug("Creating remote folder {}".format(full_remote_path)) self.client.run_command("mkdir " + full_remote_path) # data_path_content = os.listdir(path=src_data_path) # assert(len(data_path_content) == 1) # df_basename = data_path_content[0] df_basename = dataset_ # self.logger.debug("system cmd: " + "tar cvf /tmp/" + remote_temp_folder + ".tar -C " # + os.path.join(local_datapath, df_basename) + " .") self.logger.debug("system cmd: tar cvf /tmp/{}.tar -C {} .".format( remote_temp_folder, os.path.join(local_datapath, df_basename))) os.system("tar cf /tmp/" + remote_temp_folder + ".tar -C " + os.path.join(local_datapath, df_basename) + " .") try: self.logger.debug("Copying data tar file") g = self.client.scp_send("/tmp/" + remote_temp_folder + ".tar", remote_tar) joinall(g, raise_error=True) except SCPError as e: self.logger.error("Copy failed (scp error {})".format(e)) except Exception as e: self.logger.error("Copy failed: {}".format(e)) raise Exception("scp_send failed") s = "tar xvf " + remote_tar + " -C " + full_remote_path self.logger.debug("Untarring remote data: {}".format(s)) output = self.client.run_command(s) self.client.join(output) errmsg = next(output[self.hostname]["stderr"], None) if errmsg is not None: self.logger.error("Error: " + errmsg) raise Exception("Error untarring data file: " + errmsg) errmsg = next(output[self.hostname]["stdout"], None) if errmsg is not None: self.logger.debug("stdout: " + errmsg) self.logger.debug("Remove remote temp tar file " + "/tmp/" + remote_temp_folder + ".tar") os.remove("/tmp/" + remote_temp_folder + ".tar") # output files in base_dir/jobname/out def copy_data_from_remote(self, jobid, absolute_local_out_dir, cleanup_temp=True): self.logger.debug("Copying data from remote") absolute_tar_fname = os.path.join( self.remote_abs_working_folder, self.remote_working_folder + "_out.tar") absolute_output_data_path = os.path.join( self.remote_abs_working_folder, "out") stdout_file = os.path.join(self.home_dir, "slurm-" + jobid + ".out") self.logger.debug( " Remote data is located in {}".format(absolute_output_data_path)) self.logger.debug(" Slurm output file is {}".format(stdout_file)) try: self.logger.debug( " Copying slurm file to {}".format(absolute_output_data_path)) output = self.client.run_command("cp " + stdout_file + " " + absolute_output_data_path) self.client.join(output) self.logger.debug(output) self.logger.debug(" Tarring remote folder") output = self.client.run_command("tar cf " + absolute_tar_fname + " -C " + absolute_output_data_path + " .") self.client.join(output) self.logger.debug(output) # time.sleep(30) # patch since run_command sems non-blocking self.logger.debug("Picking up tar file size") output = self.client.run_command("du -sb " + absolute_tar_fname) self.client.join(output) self.logger.debug(output) line = "" for char in output[self.hostname].stdout: line += char # print(line) tar_size = int(re.match("[0-9]*", line).group(0)) self.logger.info("{} bytes to copy from remote".format(tar_size)) local_tar = "/tmp/" + self.remote_working_folder + "_out.tar" # g = self.client.scp_recv(absolute_tar_fname, local_tar) self.logger.debug( "Remote tar file is {}".format(absolute_tar_fname)) tries = 0 while tries < 3: self.logger.debug("Copying tar file to /tmp") g = self.client.copy_remote_file(absolute_tar_fname, local_tar) # scp_recv joinall(g, raise_error=True) output = subprocess.check_output("du -sb " + local_tar + "_" + self.hostname, shell=True) recv_tar_size = int( re.match("[0-9]*", output.decode("utf-8")).group(0)) self.logger.debug("Received: {} bytes".format(recv_tar_size)) if recv_tar_size == tar_size: break tries += 1 if tries == 3: raise Exception("Unable to copy tar file from remote end") if not os.path.exists(absolute_local_out_dir): # shutil.rmtree(absolute_local_out_dir) self.logger.debug( "Local destination folder {} does not exist, creating". format(absolute_local_out_dir)) os.mkdir(absolute_local_out_dir) # os.mkdir(path.join(absolute_local_out_dir,jobname) self.logger.debug( "Untarring received file to {}".format(absolute_local_out_dir)) os.system("tar xf " + local_tar + "_" + self.hostname + " -C " + absolute_local_out_dir) if cleanup_temp: # print("todo: cleanup tmp file") os.remove(local_tar + "_" + self.hostname) except Exception as e: self.logger.error( "Exception during file transfer from remote: {}".format(e)) def copy_singlefile_to_remote(self, local_filename, remote_path=".", is_executable=False): r = os.path.join( self.remote_abs_working_folder, remote_path, os.path.basename(local_filename), ) g = self.client.copy_file(local_filename, r) joinall(g, raise_error=True) if is_executable: self.client.run_command("chmod ugo+x " + r) def create_remote_subdir(self, remote_subdir): self.client.run_command( "mkdir -p " + os.path.join(self.remote_abs_working_folder, remote_subdir)) self.client.run_command( "chmod 777 " + os.path.join(self.remote_abs_working_folder, remote_subdir)) # executable_ is either raven or ostrich def copy_batchscript( self, executable_, guessed_duration, datafile_basename, batch_tmplt_fname, shub_hostname, ): template_file = open( os.path.join(self.template_path, batch_tmplt_fname), "r") abs_remote_output_dir = os.path.join(self.remote_abs_working_folder, "out") tmplt = template_file.read() tmplt = tmplt.replace("ACCOUNT", constants.cc_account_info) tmplt = tmplt.replace("DURATION", guessed_duration) tmplt = tmplt.replace("TEMP_PATH", self.remote_abs_working_folder) tmplt = tmplt.replace("INPUT_PATH", self.remote_abs_working_folder) tmplt = tmplt.replace("OUTPUT_PATH", abs_remote_output_dir) tmplt = tmplt.replace("DATAFILE_BASENAME", datafile_basename) tmplt = tmplt.replace("SHUB_HOSTNAME", shub_hostname) tmplt = tmplt.replace("EXEC", executable_) # subst_template_file, subst_fname = tempfile.mkstemp(suffix=".sh") subst_fname = self.remote_working_folder + ".sh" file = open("/tmp/" + subst_fname, "w") file.write(tmplt) file.close() self.client.run_command("mkdir " + abs_remote_output_dir) self.client.run_command("chmod 777 " + self.remote_abs_working_folder) self.client.run_command("chmod 777 " + abs_remote_output_dir) g = self.client.copy_file( "/tmp/" + subst_fname, os.path.join(self.remote_abs_working_folder, subst_fname), ) joinall(g, raise_error=True) self.client.run_command( "chmod ugo+x " + os.path.join(self.remote_abs_working_folder, subst_fname)) os.remove("/tmp/" + subst_fname) return os.path.join(self.remote_abs_working_folder, subst_fname) def submit_job(self, script_fname): self.logger.debug("Submitting job {}".format(script_fname)) # output = self.client.run_command("cd {}; ".format(self.home_dir) + constants.sbatch_cmd + # " --parsable " + script_fname) output = self.client.run_command("cd {}; {} --parsable {}".format( self.home_dir, constants.sbatch_cmd, script_fname)) self.client.join(output) errmsg = next(output[self.hostname]["stderr"], None) if errmsg is not None: for e in output[self.hostname]["stderr"]: errmsg += e + "\n" self.logger.error(" Error: {}".format(errmsg)) raise Exception("Error: " + errmsg) self.live_job_id = next(output[self.hostname]["stdout"]) self.logger.debug(" Job id {}".format(self.live_job_id)) return self.live_job_id def read_from_remote(self, remote_filename): filecontent = [] self.logger.debug("read_from_remote") retry = True # maybe remote file is being overwritten, try again if remote copy fails while True: try: local_filename = os.path.join( "/tmp", self.remote_working_folder + "_progress.json") g = self.client.copy_remote_file( os.path.join(self.remote_abs_working_folder, remote_filename), local_filename, ) joinall(g, raise_error=True) suffixed_local_filename = local_filename + "_" + self.hostname self.logger.debug(" Opening copied file") with open(suffixed_local_filename) as f: for line in f: self.logger.debug(line) filecontent.append(line) break # except SFTPIOError: # print("SFTPIOError") # return False except Exception as e: if retry: self.logger.debug( "exception {}, retrying".format(e) ) # pass # e.g. missing progress file as execution starts retry = False else: break self.logger.debug("End read_from_remote") return filecontent def get_status(self, jobid): """ :param jobid: :return: """ self.logger.debug("Inside get_status: executing sacct") cmd = constants.squeue_cmd + " -j {} -n -p -b".format(jobid) output = self.client.run_command(cmd) self.client.join(output) status_output = None # 1 line expected errmsg = next(output[self.hostname]["stderr"], None) if errmsg is not None: for e in output[self.hostname]["stderr"]: errmsg += e + "\n" self.logger.debug(" stderr: {}".format(errmsg)) raise Exception("Error: " + errmsg) stdout_str = "" for line in output[self.hostname]["stdout"]: # errmsg is None stdout_str += line + "\n" fields = line.split("|") if len(fields) >= 2: if fields[0] == jobid: status_output = fields[1].split()[0] if status_output is None: raise Exception( "Error parsing sacct output: {}".format(stdout_str)) if status_output not in [ "COMPLETED", "PENDING", "RUNNING", "TIMEOUT", "CANCELLED", ]: raise Exception( "Status error: state {} unknown".format(status_output)) return status_output def cancel_job(self, jobid): """ :param jobid: :return: """ cmd = constants.scancel_cmd + " {}".format(jobid) output = self.client.run_command(cmd) self.client.join(output) errmsg = next(output[self.hostname]["stderr"], None) if errmsg is not None: for e in output[self.hostname]["stderr"]: errmsg += e + "\n" self.logger.debug(" stderr: {}".format(errmsg)) raise Exception("Cancel error: " + errmsg) stdout_str = "" for line in output[self.hostname]["stdout"]: # errmsg is None stdout_str += line + "\n" if len(stdout_str) > 0: raise Exception("Cancel error: " + stdout_str) def reconnect(self): self.client = ParallelSSHClient([self.hostname], pkey=self.keypath, user=self.user, keepalive_seconds=300) """ def check_slurmoutput_for(self, substr, jobid): slurmfname = "slurm-" + jobid + ".out" local_slurmfname = os.path.join("/tmp", slurmfname) stdout_file = os.path.join(self.home_dir, slurmfname) found = False try: g = self.client.copy_remote_file(stdout_file, local_slurmfname) joinall(g, raise_error=True) # scan file for substr with open(local_slurmfname + "_" + self.hostname) as f: for line in f: print("comparing {} with {}".format(substr,line)) match_obj = re.search(substr, line) print(match_obj) if match_obj: found = True print("found") os.remove(local_slurmfname + "_" + self.hostname) except Exception as e: print("Exception inside check_slurmoutput_for") print(e) pass return found """ def cleanup(self, jobid): try: self.logger.debug("Deleting the remote folder") output1 = self.client.run_command("rm -rf {}".format( os.path.join(self.home_dir, self.remote_abs_working_folder))) self.logger.debug("Deleting the slurm log file") logfilepath = os.path.join(self.home_dir, "slurm-{}.out".format(jobid)) output2 = self.client.run_command("rm {}".format(logfilepath)) self.logger.debug("Deleting the local progress file") local_filename = os.path.join( "/tmp", self.remote_working_folder + "_progress.json") suffixed_local_filename = local_filename + "_" + self.hostname os.remove(suffixed_local_filename) self.logger.debug(next(output1[self.hostname]["stdout"])) self.logger.debug(next(output2[self.hostname]["stdout"])) self.logger.debug(next(output1[self.hostname]["stderr"])) self.logger.debug(next(output2[self.hostname]["stderr"])) except Exception as e: self.logger.debug("Hmm file cleanup failed: {}".format(e))
from pssh.clients import ParallelSSHClient hosts = ['192.168.56.101', "localhost"] client = ParallelSSHClient(hosts, user="******", password="******") output = client.run_command('ls /usr/local/src') for host, host_output in output.items(): for line in host_output.stdout: print(f"Host [{host}] - {line}")
from pssh.clients import ParallelSSHClient, SSHClient hosts = ['localhost'] cmd = 'uname' client = ParallelSSHClient(hosts) output = client.run_command(cmd) for host_out in output: for line in host_out.stdout: print(line) print("Host %s: exit code %s" % (host_out.host, host_out.exit_code))
class SSHManager(object): context_changed = False def __init__(self, hosts, host_config): self.hosts = sorted(hosts) self.all_hosts = copy.deepcopy(self.hosts) self.client = ParallelSSHClient(self.hosts, host_config=host_config) def remove_hosts(self, hosts): indices = [] new_hosts = [] for i in range(len(self.all_hosts)): if self.all_hosts[i] in hosts: indices.append(i) else: new_hosts.append(self.all_hosts[i]) self.all_hosts = new_hosts if self.context_changed: self.hosts = list(filter(lambda h: h not in hosts, self.hosts)) else: self.hosts = copy.deepcopy(self.all_hosts) self.client.host_config = list( map( lambda x: x[1], filter(lambda x: x[0] not in indices, enumerate(self.client.host_config)), )) self.client.hosts = self.all_hosts def add_host(self, host): self.all_hosts.append(host.host) self.all_hosts = sorted(list(set(self.all_hosts))) host_configs = self.client.host_config idx = self.all_hosts.index(host.host) host_configs.insert(idx, host.build_host_config()) self.client.hosts = self.all_hosts self.client.host_config = host_configs if not self.context_changed: self.hosts = copy.deepcopy(self.all_hosts) def run_command(self, command, commands=None, sudo=False): if commands is None: return self.client.run_command(command, sudo=sudo) else: return self.client.run_command(command, host_args=commands, sudo=sudo) def join(self, output): self.client.join(output) def change_context_hosts_all(self): self.change_context_hosts(self.all_hosts) def change_context_hosts(self, new_hosts): new_hosts = sorted(list(set(new_hosts))) for h in new_hosts: if h not in self.all_hosts: raise SSHManager.ContextException(f"Host {h} not in host list") self.hosts = list(filter(lambda h: h in new_hosts, self.all_hosts)) self.context_changed = True def change_context_indices(self, indices): indices = sorted(indices) if indices[0] < 0 or indices[len(indices) - 1] >= len(self.all_hosts): raise SSHManager.ContextException("Indices out of range") new_hosts = [] for i in indices: new_hosts.append(self.all_hosts[i]) self.change_context_hosts(new_hosts) def reset_context(self): self.hosts = copy.deepcopy(self.all_hosts) self.context_changed = False @staticmethod def build_host_config(*, n=0, users=None, passwords=None, user=None, password=None): if bool(users is None) == bool(user is None): raise HostConfigException( "Users or User (not both) needs to be defined") elif bool(passwords is None) == bool(password is None): raise HostConfigException( "Passwords or Password (not both) needs to be defined") elif passwords != None and len(passwords) != n: raise HostConfigException("Length of Passwords != n") elif users != None and len(users) != n: raise HostConfigException("Length of Users != n") elif n <= 0: raise HostConfigException("n should be greater than 0") if user is None and password is None: return [ HostConfig(user=users[i], password=passwords[i]) for i in range(n) ] elif user is None: return [ HostConfig(user=users[i], password=password) for i in range(n) ] elif password is None: return [ HostConfig(user=user, password=password[i]) for i in range(n) ] else: return [HostConfig(user=user, password=password) for i in range(n)] class ContextException(Exception): pass