def _run_on_all_workaround(self, cmd, group_size, **run_kwargs): total_conns = len(self._worker_conns) + 1 print(f'{total_conns} Nodes') groups = [] group_conns = [] for i, worker_conn in enumerate(self._individual_worker_conns): if i % group_size == 0 and i != 0: groups.append(ThreadingGroup.from_connections(group_conns)) group_conns = [] group_conns.append(worker_conn) flattened_results = [] # Either add the master to one of the groups or create a group for it (if groups are all full or no workers) if len(group_conns) != 0 and len(group_conns) != group_size: group_conns.append(self._master_conn) groups.append(ThreadingGroup.from_connections(group_conns)) else: if len(group_conns) != 0: groups.append(ThreadingGroup.from_connections(group_conns)) master_result = self.run_on_master(cmd, **run_kwargs) flattened_results.append(master_result) for i, worker_conn_group in enumerate(groups): group_results = worker_conn_group.run(cmd, **run_kwargs) flattened_results.extend(group_results.values()) return flattened_results
def tg_deploy(): ''' hreadingGroup(*hosts, **kwargs):按并发方式执行操作,当中有一台主机连接不上,后面的主机还可以执行。 :return: ''' with ThreadingGroup(*hosts) as g: g.run('uname -s')
def getTempByHostThreaded(hosts, tempByHost): if len(hosts) > 0: groupResult = None with ThreadingGroup(*hosts, user=userName, connect_timeout=10) as grp: try: groupResult = grp.run(tempCommand, hide=True, timeout=5) except exceptions.GroupException as res: groupResult = res.args[0] print(f"Got GroupException running command: {res}") except ssh_exception.NoValidConnectionsError as res: groupResult = res.args[0] print(f"Got NoValidConnectionsError running command: {res}") except ssh_exception.SSHException as res: groupResult = res.args[0] print(f"Got SSH exception running command: {res}") if (groupResult): # successfull = groupResult.succeeded for conn in groupResult: result = groupResult[conn] if isinstance(result, Result): #Only process successful commands tempStr = result.stdout.strip('\n') # print(f"Remote result: Parsing str :{tempStr} with formatter {tempResultFormat}") out = tempResultParser.parse(tempStr) if (out): tempByHost[conn.host] = out[0] else: print(f"Failed to run command: {groupResult}") return tempByHost
def group_run(pool, cmd_list, **kwargs): GroupResult_list = _append_GroupResult([], pool, 'hostname', **kwargs) #pool_work = Group.from_connections([c for c in pool if type(GroupResult_list[0].get(c)) == fabric.runners.Result]) pool_work = Group.from_connections([c for c in pool if c.is_connected]) return _append_GroupResult(GroupResult_list, pool_work, cmd_list, **kwargs)
def __init__(self, targets=None): self.__group = ThreadingGroup(*targets)
class BaseSystem(object): HS_INSTALL_PATH = "/mnt/data/pdaqlocal/HsInterface" HS_SOURCE_PATH = "/home/pdaq/HsInterface/current" DEFAULT_BWLIMIT = 1000 DEFAULT_LOG_FORMAT = "%i%n%L" RSYNC_EXCLUDE_LIST = (".svn", ".hg", ".hgignore", "*.log", "*.pyc", "*.swp") CRONTAB_TEXT = """ ###HitSpool Jobs -- Please contact [email protected] for questions HitSpoolPath=/mnt/data/pdaqlocal/HsInterface/current 15 * * * * python \$HitSpoolPath/HsWatcher.py @reboot python \$HitSpoolPath/HsWatcher.py """ def __init__(self, targets=None): self.__group = ThreadingGroup(*targets) def __create_exclude_file(self): """ Create a file containing patterns of files/directories which `rsync` should ignore """ tmp_fd, tmpname = tempfile.mkstemp(text=True) os.write(tmp_fd, "\n".join(self.RSYNC_EXCLUDE_LIST)) os.close(tmp_fd) return tmpname def __get_hs_name(self, hostname): if hostname == "2ndbuild": return "HsSender" if hostname == "expcont": return "HsPublisher" if "hub" in hostname or hostname == "scube": return "HsWorker" raise Exception("Unknown HitSpool host \"%s\"" % hostname) def __report_failures(self, results, action): if not results.failed: return 0 fail_list = None for conn, rslt in results.failed.items(): if hasattr(rslt, "stdout"): print("%s ERROR from %s: %s" % (action, conn.host, rslt.stdout.rstrip()), file=sys.stderr) else: # handle partial results wrapped in a GroupException if fail_list is None: fail_list = [] fail_list.append(conn.host) if fail_list is not None: print("%s FAILED on %s" % (action, ", ".join(fail_list)), file=sys.stderr) return len(results.failed) def __rsync_to_remote_host(self, cmdfmt, hostname=None): """ Copy the current HitSpool release to a single remote host """ # hack around the missing 'ThreadPool.startmap()' if hostname is None and \ (isinstance(cmdfmt, tuple) or isinstance(cmdfmt, list)): hostname = cmdfmt[1] cmdfmt = cmdfmt[0] return self.__run_command(hostname, cmdfmt % hostname) def __run_command(self, hostname, cmd): """ Run a local command, print any output, and return the process's return code (non-zero indicates an error) """ proc = subprocess.Popen(cmd, bufsize=256, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: num_err = 0 while True: reads = [proc.stdout.fileno(), proc.stderr.fileno()] try: ret = select.select(reads, [], []) except select.error: # ignore a single interrupt if num_err > 0: break num_err += 1 continue for fin in ret[0]: if fin == proc.stderr.fileno(): line = proc.stderr.readline().decode("utf-8") if line != "": print("%s ERROR: %s" % (line, ), file=sys.stderr) continue if fin != proc.stdout.fileno(): # ignore unknown file descriptors continue line = proc.stderr.readline().decode("utf-8") if line != "": print("[%s] %s" % (hostname, line)) if proc.poll() is not None: break finally: proc.stdout.close() proc.stderr.close() return proc.wait() def modify_crontab(self, delete=False, debug=False, quiet=False): delete_cmd = "egrep -v 'HitSpool|HSiface|HsWatcher|^\$'" if delete: verb = "delete" past_verb = "deleted" cmd = "crontab -l | %s | crontab -" % (delete_cmd, ) else: verb = "install" past_verb = "installed" cmd = "(crontab -l | %s; echo \"%s\") | crontab -" % \ (delete_cmd, self.CRONTAB_TEXT.strip(), ) try: results = self.__group.run(cmd, hide=quiet) except GroupException as gex: if gex.message.rstrip() != "": print("Failed to %s crontab: %s" % ( verb, gex.message, )) results = gex.result num_failed = self.__report_failures(results, "%sCrontab" % verb.capitalize()) if not quiet: print("%s crontab on %s hosts" % (past_verb.capitalize(), len(self.__group) - num_failed)) def make_directories(self, debug=False, quiet=False): """ Create HitSpool directories on all remote systems """ for subdir in "current", "logs": path = os.path.join(self.HS_INSTALL_PATH, subdir) results = self.__group.run("mkdir -p %s" % path, hide=True) num_failed = self.__report_failures(results, "MakeDirs") if not quiet: print("Created '%s' on %s hosts" % (path, len(self.__group) - num_failed)) def report_status(self): results = self.__group.run("ps axwww", hide=True) num_failed = self.__report_failures(results, "ProcessList") active = [] inactive = [] for conn, rslt in sorted(results.items(), key=lambda x: x[0].host): hostname = conn.host outlines = rslt.stdout target = self.__get_hs_name(hostname) if target in outlines: active.append(hostname) else: inactive.append(hostname) if len(self.__group) == len(active): print("All hosts are active") elif len(self.__group) == len(inactive): print("All hosts are INACTIVE") else: if len(active) > 0: print("Active: %s" % ", ".join(active)) if len(inactive) > 0: print("Inactive: %s" % ", ".join(inactive)) def rsync_install(self, bwlimit=None, no_relative=False, debug=False, quiet=False): if bwlimit is None: bwlimit = self.DEFAULT_BWLIMIT exclude_file = self.__create_exclude_file() try: rmtpath = os.path.join(self.HS_INSTALL_PATH, "current") cmdfmt = "nice rsync -a --bwlimit=%s%s --exclude-from=%s" \ " %s/." \ " \"%%s:%s\"" % \ (bwlimit, " --no-relative" if no_relative else "", exclude_file, self.HS_SOURCE_PATH, rmtpath) # build argument lists for all hosts params = [(cmdfmt, entry.host) for entry in self.__group] # run rsync on all remote hosts pool = ThreadPool() results = pool.map(self.__rsync_to_remote_host, params) synced = None for idx, rtncode in enumerate(results): hostname = params[idx][1] if rtncode != 0: print("WARNING: %s exited with non-zero return code %s" % (params[idx][1], rtncode), file=sys.stderr) else: if synced is None: synced = [] synced.append(hostname) if not quiet and synced is not None: print("Deployed to %d hosts" % len(synced)) finally: os.unlink(exclude_file) def run_watcher(self, stop_process=False, debug=False, quiet=False): watcher = os.path.join(self.HS_INSTALL_PATH, "current", "HsWatcher.py") stop_arg = " -k" if stop_process else "" try: results = self.__group.run("python %s%s" % (watcher, stop_arg), hide=True) except GroupException as gex: if gex.message.rstrip() != "": print("Failed to %s: %s" % ("stop" if stop_process else "start", gex.message)) results = gex.result num_failed = self.__report_failures( results, "Stop" if stop_process else "Start") if not quiet and num_failed < len(self.__group): verb = "Stopped" if stop_process else "Started" print("%s HitSpool daemon on %s hosts" % (verb, len(self.__group) - num_failed)) return num_failed == 0
def __init__(self, username, master_ip, worker_ips, ssh_key_path, use_bastion=False, wait_for_ssh=True, wait_for_ssh_timeout=120): """ Args: username: The username used to ssh to the instance. Often 'ubuntu' or 'ec2-user' master_ip: A single IP for the master node. Typically should be the public IP if the location this code is running is outside of the VPC and the private IP if running from another EC2 node in the same VPC. In many cases, the distinction between master and workers is arbitrary. If use_bastion is True, the master node will be the bastion host. worker_ips: A possibly empty list of ips for the worker nodes. If there is only a single worker, a string can be passed in instead of a list. ssh_key_path: The path to the SSH key required to SSH into the EC2 instances. Often ~/.ssh/something.pem use_bastion (bool): Whether or not to use the master node as the bastion host for SSHing to worker nodes. wait_for_ssh (bool): If true, block until commands can be run on all instances. This can be useful when you are launching EC2 instances, because the instances may be in the RUNNING state but the SSH daemon may not yet be running. wait_for_ssh_timeout: Number of seconds to spend trying to run commands on the instances before failing. This is NOT the SSH timeout, this upper bounds the amount of time spent retrying failed SSH connections. Only used if wait_for_ssh=True. """ if not isinstance(worker_ips, list): worker_ips = [worker_ips] self._username = username self._master_ip = master_ip self._worker_ips = worker_ips self._all_ips = [self._master_ip] + self._worker_ips self.use_bastion = use_bastion connect_kwargs = { "key_filename": [os.path.expanduser(ssh_key_path)], "banner_timeout": 30 # NOTE 1 above } self._master_conn = Connection(user=self._username, host=self._master_ip, forward_agent=True, connect_kwargs=connect_kwargs) worker_conns = [] for worker_ip in self._worker_ips: if self.use_bastion: c = Connection(user=self._username, host=worker_ip, connect_kwargs=connect_kwargs, gateway=Connection(user=self._username, host=master_ip, forward_agent=True, connect_kwargs=connect_kwargs)) else: c = Connection(user=self._username, host=worker_ip, connect_kwargs=connect_kwargs) worker_conns.append(c) self._individual_worker_conns = worker_conns self._worker_conns = ThreadingGroup.from_connections(worker_conns) self._all_conns = ThreadingGroup.from_connections([self._master_conn] + worker_conns) if wait_for_ssh: self.wait_for_ssh_ready(wait_timeout=wait_for_ssh_timeout)
from fabric2 import Config # SSHコンフィグレーション from fabric2 import Connection # コネクション from fabric2 import ThreadingGroup # マルチスレッド実行 from fabric2 import SerialGroup # シングルスレッド実行 from invoke import task # @task annotation from invoke import Responder #con = Connection('[email protected]', connect_kwargs = { 'key_filename': 'id_rsa'}) # SSH の設定ファイル指定 # Config.ssh_config_path = 'ssh_config' hosts = ('192.168.19.128', '192.168.19.129') # SingleThread Run print('--- SingleThread Run ---') for host in hosts: con = Connection(host) print(con.host) con.run('hostname') # SingleThread Group Run print('--- SingleThread Group Run ---') result = SerialGroup('192.168.19.128', '192.168.19.129').run('hostname') # MultiThread Group Run print('--- MultiThread Group Run ---') result = ThreadingGroup('192.168.19.128', '192.168.19.129').run('hostname')