def valid_ssh(hostname, port, username, password): try: private_key = AppSetting.get('private_key') public_key = AppSetting.get('public_key') except KeyError: private_key, public_key = SSH.generate_key() AppSetting.set('private_key', private_key, 'ssh private key') AppSetting.set('public_key', public_key, 'ssh public key') if password: print(password) cli = SSH(hostname, port, username, password=password) try: cli.ping() except Exception: return False code, out = cli.exec_command('mkdir -p -m 700 ~/.ssh && \ echo %r >> ~/.ssh/authorized_keys && \ chmod 600 ~/.ssh/authorized_keys' % public_key) if code != 0: raise Exception(f'add public key error: {out!r}') else: cli = SSH(hostname, port, username, private_key) try: cli.ping() except Exception: return False return True
def create_ion(tsuite): """Create ION file systems. Args: tsuite: tsuite runtime.""" for ion in tsuite.sl2objects["ion"]: #Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **ion) #Create remote connection to server try: user, host = tsuite.user, ion["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') cmd = """ mkdir -p {datadir} mkdir -p {fsroot} {slmkfs} -Wi -u {fsuuid} -I {site_id} {fsroot}"""\ .format(**repl_dict) sock_name = "ts.ion." + ion["id"] sl2gen.sl_screen_and_wait(tsuite, ssh, cmd, sock_name) log.info("Finished creating {0}!".format(ion["name"])) ssh.close() except SSHException, e: log.fatal("Error with remote connection to {0} with res {1}!"\ .format(ion["host"], ion["name"])) tsuite.shutdown()
def create_ion(tsuite): """Create ION file systems. Args: tsuite: tsuite runtime.""" for ion in tsuite.sl2objects["ion"]: #Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **ion) #Create remote connection to server try: user, host = tsuite.user, ion["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') cmd = """ mkdir -p {datadir} mkdir -p {fsroot} {slmkfs} -Wi -u {fsuuid} -I {site_id} {fsroot}"""\ .format(**repl_dict) sock_name = "ts.ion."+ion["id"] sl2gen.sl_screen_and_wait(tsuite, ssh, cmd, sock_name) log.info("Finished creating {0}!".format(ion["name"])) ssh.close() except SSHException, e: log.fatal("Error with remote connection to {0} with res {1}!"\ .format(ion["host"], ion["name"])) tsuite.shutdown()
def connect(self): """ Connect to DHCP server via SSH. """ if not self._conn: self._conn = SSH(self._fqdn) self._conn.connect()
def decorator(self, *args, **kwargs): from utils.ssh import SSH if not self.conn: self.conn = SSH(self.host.fqdn) self.conn.connect() return function(self, *args, **kwargs)
def k8s_apply(self, params, out_func=None): cmd = K8S_APPLY_CMD + params['filename'] ssh = SSH(hostname=params['public_ip'], port=22, username=params['username'], passwd=params['passwd']) out, err = ssh.exec_rt(cmd, out_func) return out, err
def create_mds(tsuite): """Initialize MDS resources for testing. Args: tsuite: tsuite runtime.""" #Create the MDS systems for mds in tsuite.sl2objects["mds"]: #Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **mds) #Create remote connection to server try: #Can probably avoid doing user, host everytime user, host = tsuite.user, mds["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') cmd = """ $SHELL -c "cd {src} && make printvar-CC >/dev/null" pkill zfs-fuse || true $SHELL -c "{zfs_fuse} &" sleep 2 {zpool} destroy {zpool_name} || true sleep 2 {zpool} create -m {zpool_path} -f {zpool_name} {zpool_args} sleep 2 {zpool} set cachefile={zpool_cache} {zpool_name} sleep 2 {slmkfs} -u {fsuuid} -I {site_id} {zpool_path} sleep 2 sync umount {zpool_path} pkill zfs-fuse || true sleep 2 $SHELL -c "{zfs_fuse} &" sleep 2 {zpool} import {zpool_name} || true sleep 2 pkill zfs-fuse || true sleep 2 mkdir -p {datadir} {slmkjrnl} -D {datadir} -b {jrnldev} -f -u {fsuuid}""".format( **repl_dict) screen_name = "ts.mds." + mds["id"] sl2gen.sl_screen_and_wait(tsuite, ssh, cmd, screen_name) log.info("Finished creating {0}".format(mds["name"])) ssh.close() except SSHException, e: log.fatal("Error with remote connection to {0} with res {1}!"\ .format(mds["host"], mds["name"])) tsuite.shutdown()
def create_mds(tsuite): """Initialize MDS resources for testing. Args: tsuite: tsuite runtime.""" #Create the MDS systems for mds in tsuite.sl2objects["mds"]: #Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **mds) #Create remote connection to server try: #Can probably avoid doing user, host everytime user, host = tsuite.user, mds["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') cmd = """ $SHELL -c "cd {src} && make printvar-CC >/dev/null" pkill zfs-fuse || true $SHELL -c "{zfs_fuse} &" sleep 2 {zpool} destroy {zpool_name} || true sleep 2 {zpool} create -m {zpool_path} -f {zpool_name} {zpool_args} sleep 2 {zpool} set cachefile={zpool_cache} {zpool_name} sleep 2 {slmkfs} -u {fsuuid} -I {site_id} {zpool_path} sleep 2 sync umount {zpool_path} pkill zfs-fuse || true sleep 2 $SHELL -c "{zfs_fuse} &" sleep 2 {zpool} import {zpool_name} || true sleep 2 pkill zfs-fuse || true sleep 2 mkdir -p {datadir} {slmkjrnl} -D {datadir} -b {jrnldev} -f -u {fsuuid}""".format(**repl_dict) screen_name = "ts.mds."+mds["id"] sl2gen.sl_screen_and_wait(tsuite, ssh, cmd, screen_name) log.info("Finished creating {0}".format(mds["name"])) ssh.close() except SSHException, e: log.fatal("Error with remote connection to {0} with res {1}!"\ .format(mds["host"], mds["name"])) tsuite.shutdown()
def create_image(self, params, out_func=None): cmd = CREATE_IMAGE_CMD + ' '.join([ params['app_name'], params['image_name'], params['repos_https_url'], params['branch_name'], params['version'] ]) ssh = SSH(hostname=params['public_ip'], port=22, username=params['username'], passwd=params['passwd']) out, err = ssh.exec_rt(cmd, out_func) return out, err
def check_status(self): """Generate general status report for all sl2 objects. Returns: { "type":[ {"host": ..., "reports": ... } ], ... }""" report = {} #Operations based on type ops = { "all": { "load": "cat /proc/loadavg | cut -d' ' -f1,2,3", "mem_total": "cat /proc/meminfo | head -n1", "mem_free": "sed -n 2,2p /proc/meminfo", "uptime": "cat /proc/uptime | head -d' ' -f1", "disk_stats": "df -hl" }, "mds": { "connections": "{slmctl} -sconnections", "iostats": "{slmctl} -siostats" }, "ion": { "connections": "{slictl} -sconnections", "iostats": "{slictl} -siostats" } } for sl2_restype in self.sl2objects.keys(): report[sl2_restype] = [] obj_ops = ops["all"] if sl2_obj[sl2_restype] in ops: obj_ops = dict(ops["all"].items() + ops[sl2_restype].items()) for sl2_obj in self.sl2objects[sl2_restype]: obj_report = { "host": sl2_obj["host"], "id": sl2_obj["id"], "reports": {} } user, host = self.user, sl2_obj["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, '') for op, cmd in obj_ops.items(): obj_report["reports"][op] = ssh.run(cmd, timeout=2) report[sl2_restype].append(obj_report) log.debug("Status check completed for {0} [{1}]".format( host, sl2_restype)) ssh.close() return report
def k8s_delete(self, params, out_func=None): if params.get('obj_type') and params.get('obj_name'): cmd = K8S_DELETE_CMD + ' '.join( [params['obj_type'], params['obj_name']]) else: return '', '' ssh = SSH(hostname=params['public_ip'], port=22, username=params['username'], passwd=params['passwd']) out, err = ssh.exec_rt(cmd, out_func) return out, err
def kill_mnt(tsuite): """Kill ION daemons. Args: tsuite: runtime tsuite.""" for client in tsuite.sl2objects["client"]: ssh = SSH(tsuite.user, client["host"]) if not ssh.run("sudo umount {0}".format(tsuite.build_dirs["mp"]))["err"] == []: log.critical("Cannot unmount client mountpoint at {0} @ {1}.".format(tsuite.build_dirs["mp"], client["host"])) sl2gen.stop_slash2_socks(tsuite, "client", tsuite.sl2objects["client"], "msctl", "mount_slash")
def get_captcha_from_k8s_log(self): SSH_CONFIG = HandleConfig(r'{}\ssh_config.yml'.format( SharePath.CONFIG_FOLDER)).config ssh_config = SSH_CONFIG.get("ssh") # ssh_config['hostname'] = "10.151.3.96" # 动态传入当前运行环境的ip ssh_config['hostname'] = f'{get_run_env()["host"]}' ssh = SSH(**ssh_config) oauth2_pod_name = ssh.execute_command( "kubectl get pods | grep oauth2 | awk '{print $1}'") captcha = ssh.execute_command( f"kubectl logs {oauth2_pod_name.rstrip()} --tail 2 | grep 生成验证码存入redis | awk -F ' ' '{{print $5}}'" ) return captcha.rstrip()
def get_captcha_from_k8s_log(): """ 从K8S日志中获取验证码 返回: 验证码字符串 """ ssh_config = LoginPage.SSH_CONFIG.get("ssh") ssh_config['hostname'] = sb_config.host ssh = SSH(**ssh_config) oauth2_pod_name = ssh.execute_command( "kubectl get pods | grep oauth2 | awk '{print $1}'") captcha = ssh.execute_command( f"kubectl logs {oauth2_pod_name.rstrip()} --tail 2 | grep 生成验证码存入redis | awk -F ' ' '{{print $5}}'") return captcha.rstrip()
def execute(self, args): file = "/etc/dtux/env.yml" config = self.read_configuration(file) sshobjFromYml = config[args.source] sshobj = sshObject(sshobjFromYml['ipAddress'], sshobjFromYml['user'], sshobjFromYml['password']) scp = SSH(sshobj) scp.scp_put(sshobj, "/etc/dtux/env.yml", "/root/diego.txt") command = scp.run_command("w") print(command) logger.warning("Some message: %s", command)
def check_status(self): """Generate general status report for all sl2 objects. Returns: { "type":[ {"host": ..., "reports": ... } ], ... }""" report = {} # Operations based on type ops = { "all": { "load": "cat /proc/loadavg | cut -d' ' -f1,2,3", "mem_total": "cat /proc/meminfo | head -n1", "mem_free": "sed -n 2,2p /proc/meminfo", "uptime": "cat /proc/uptime | head -d' ' -f1", "disk_stats": "df -hl", }, "mds": {"connections": "{slmctl} -sconnections", "iostats": "{slmctl} -siostats"}, "ion": {"connections": "{slictl} -sconnections", "iostats": "{slictl} -siostats"}, } for sl2_restype in self.sl2objects.keys(): report[sl2_restype] = [] obj_ops = ops["all"] if sl2_obj[sl2_restype] in ops: obj_ops = dict(ops["all"].items() + ops[sl2_restype].items()) for sl2_obj in self.sl2objects[sl2_restype]: obj_report = {"host": sl2_obj["host"], "id": sl2_obj["id"], "reports": {}} user, host = self.user, sl2_obj["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, "") for op, cmd in obj_ops.items(): obj_report["reports"][op] = ssh.run(cmd, timeout=2) report[sl2_restype].append(obj_report) log.debug("Status check completed for {0} [{1}]".format(host, sl2_restype)) ssh.close() return report
def create_remote_setups(self): """Create the necessary build directories on all slash2 objects.""" for sl2_obj in self.all_objects(): try: ssh = SSH(self.user, sl2_obj["host"], '') log.debug("Creating build directories on {0}@{1}".format( sl2_obj["name"], sl2_obj["host"])) for d in self.build_dirs.values(): ssh.make_dirs(d) ssh.run("sudo chmod -R 777 \"{0}\"".format(d), quiet=True) ssh.close() except SSHException: log.error( "Unable to connect to {0} to create build directories!". format(sl2_obj["host"]))
def get_installations(fqdn): """ Retrieves information of the installations. """ try: machine = Machine.objects.get(fqdn=fqdn) except Machine.DoesNotExist: logger.warning("Machine '{}' does not exist".format(fqdn)) return False conn = None timer = None try: conn = SSH(fqdn) conn.connect() timer = threading.Timer(5 * 60, conn.close) timer.start() # Installations logger.debug("Collect installations...") installations = [] output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_installations.sh') if output: for line in output: if line.startswith('--'): installation = Installation(machine=machine) installations.append(installation) elif line.startswith('ARCH='): installation.architecture = line.split('=')[1].strip() elif line.startswith('KERNEL='): installation.kernelversion = line.split('=')[1].strip() elif line.startswith('RUNNING='): installation.active = line.startswith('RUNNING=1') elif line.startswith('DIST='): installation.distribution = line.split('=')[1].strip() elif line.startswith('PART='): installation.partition = line.split('=')[1].strip() return installations except Exception as e: logger.error("{} ({})".format(fqdn, e)) return False finally: if conn: conn.close() if timer: timer.cancel() return None
def create_remote_setups(self): """Create the necessary build directories on all slash2 objects.""" for sl2_obj in self.all_objects(): try: ssh = SSH(self.user, sl2_obj["host"], "") log.debug("Creating build directories on {0}@{1}".format(sl2_obj["name"], sl2_obj["host"])) for d in self.build_dirs.values(): ssh.make_dirs(d) ssh.run('sudo chmod -R 777 "{0}"'.format(d), quiet=True) ssh.close() except SSHException: log.error("Unable to connect to {0} to create build directories!".format(sl2_obj["host"]))
def execute(self): """ Executes the task. """ if not ServerConfig.objects.bool_by_key('orthos.debug.setup.execute'): logger.warning( "Disabled: set 'orthos.debug.setup.execute' to 'true'") return logger.debug('Calling setup script...') try: machine = Machine.objects.get(fqdn=self.fqdn) tftp_server = machine.fqdn_domain.tftp_server if not tftp_server: logger.warning("No TFTP server available for '{}'".format( machine.fqdn_domain.name)) return command_template = ServerConfig.objects.by_key( 'setup.execute.command') context = Context({'machine': machine, 'choice': self.choice}) command = Template(command_template).render(context) logger.debug("Initialize setup {}@{}: {}:{}".format( self.choice, machine.fqdn, tftp_server.fqdn, command)) tftp_server = SSH(tftp_server.fqdn) tftp_server.connect() stdout, stderr, exitstatus = tftp_server.execute(command) tftp_server.close() if exitstatus != 0: logger.warning( "Creating setup configuration failed for '{}'".format( machine)) return # reboot machine finally machine.reboot() except SSH.Exception as exception: logger.error(exception) except Machine.DoesNotExist: logger.error("Machine does not exist: fqdn={}".format(self.fqdn)) except Exception as e: logger.exception(e)
def stop_slash2_socks(tsuite, sock_name, sl2objects, ctl_type, daemon_type): """ Terminates all slash2 socks and screen socks on a generic host. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. ctl_type: key to ctl path in src_dirs daemon_type: key to daemon path in src_dirs""" assert (ctl_type in tsuite.src_dirs) assert (daemon_type in tsuite.src_dirs) for sl2object in sl2objects: log.info("Killing {0} @ {1}".format(sl2object["name"], sl2object["host"])) #Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = None try: ssh = SSH(user, host, '') except Exception: log.error("Unable to connect to {0}@{1}".format(user, host)) return cmd = "{0} -S {1}/{2}.{3}.sock stop".format(tsuite.src_dirs[ctl_type], tsuite.build_dirs["ctl"], daemon_type, host) ssh.run(cmd) if "id" not in sl2object.keys(): sl2object["id"] = 0 screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) ssh.kill_screens(screen_sock_name, exact_sock=True) ssh.close()
def login_test(fqdn): """ Checks if it's possible to login via SSH. """ conn = None try: conn = SSH(fqdn) conn.connect() except Exception as e: logger.warning("SSH login failed for '{}': {}".format(fqdn, e)) return False finally: if conn: conn.close() return True
def abuild_test(fqdn): """ Checks if Autobuild is running. """ conn = None try: conn = SSH(fqdn) conn.connect() pids, stderr, exitstatus = conn.execute( r"ps -e -o pid,cmd | awk '/.*\/usr\/sbin\/autobuild.*/{print $1}'") if len(pids) > 0: return True except Exception as e: logger.warning("SSH login failed for '{}': {}".format(fqdn, e)) return False finally: if conn: conn.close() return False
def ssh_shutdown(self, user=None, reboot=False): """ Powers off/reboots the machine using SSH. """ from utils.ssh import SSH if not reboot: option = '--poweroff' else: option = '--reboot' machine = SSH(self.fqdn) machine.connect() command = 'shutdown {} now'.format(option) stdout, stderr, exitstatus = machine.execute(command, retry=False) machine.close() if exitstatus != 0: return False return True
def remote_ssh(self, params): """ 远程控制主机 :param params: dict, 必须{'public_ip', 'username', 'passwd', 'cmd'}, 可选{'rt'(实时输出), 'out_func'} """ try: ssh = SSH(hostname=params['public_ip'], username=params['username'], passwd=params['passwd']) if params.get('rt'): out, err = ssh.exec_rt(params['cmd'], params.get('out_func')) else: out, err = ssh.exec(params['cmd']) ssh.close() return out, err except Exception as e: return [], [str(e)]
def stop_slash2_socks(tsuite, sock_name, sl2objects, ctl_type, daemon_type): """ Terminates all slash2 socks and screen socks on a generic host. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. ctl_type: key to ctl path in src_dirs daemon_type: key to daemon path in src_dirs""" assert ctl_type in tsuite.src_dirs assert daemon_type in tsuite.src_dirs for sl2object in sl2objects: log.info("Killing {0} @ {1}".format(sl2object["name"], sl2object["host"])) # Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = None try: ssh = SSH(user, host, "") except Exception: log.error("Unable to connect to {0}@{1}".format(user, host)) return cmd = "{0} -S {1}/{2}.{3}.sock stop".format( tsuite.src_dirs[ctl_type], tsuite.build_dirs["ctl"], daemon_type, host ) ssh.run(cmd) if "id" not in sl2object.keys(): sl2object["id"] = 0 screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) ssh.kill_screens(screen_sock_name, exact_sock=True) ssh.close()
def parse_slash2_conf(self): """Reads and parses slash2 conf for tokens. Writes to the base directory; updates slash2 objects in the tsuite.""" try: with open(self.conf["slash2"]["conf"]) as conf: new_conf = "#TSuite Slash2 Conf\n" res, site_name = None, None in_site = False site_id, fsuuid = -1, -1 client = None # Regex config parsing for sl2objects reg = { "clients": re.compile("^\s*?#\s*clients\s*=\s*(.+?)\s*;\s*$"), "type": re.compile("^\s*?type\s*?=\s*?(\S+?)\s*?;\s*$"), "id": re.compile("^\s*id\s*=\s*(\d+)\s*;\s*$"), "zpool": re.compile(r"^\s*?#\s*?zfspool\s*?=\s*?(\w+?)\s+?(.*?)\s*$"), "zpool_path": re.compile(r"^\s*?#\s*?zfspath\s*?=\s*?(.+?)\s*$"), "prefmds": re.compile(r"\s*?#\s*?prefmds\s*?=\s*?(\w+?@\w+?)\s*$"), "fsuuid": re.compile(r"^\s*set\s*fsuuid\s*=\s*\"?(0x[a-fA-F\d]+|\d+)\"?\s*;\s*$"), "fsroot": re.compile("^\s*?fsroot\s*?=\s*?(\S+?)\s*?;\s*$"), "nids": re.compile("^\s*?nids\s*?=\s*?(.*)$"), "new_res": re.compile("^\s*resource\s+(\w+)\s*{\s*$"), "fin_res": re.compile("^\s*?}\s*$"), "site": re.compile("^\s*?site\s*?@(\w+).*?"), "site_id": re.compile("^\s*site_id\s*=\s*(0x[a-fA-F\d]+|\d+)\s*;\s*$"), "jrnldev": re.compile("^\s*jrnldev\s*=\s*([/\w]+)\s*;\s*$"), } line = conf.readline() while line: # Replace keywords and append to new conf new_conf += repl(self.build_dirs, line) # Iterate through the regexes and return a tuple of # (name, [\1, \2, \3, ...]) for successful matches matches = [(k, reg[k].match(line).groups()) for k in reg if reg[k].match(line)] # Should not be possible to have more than one assert len(matches) <= 1 # log.debug("%s %s %s\n->%s" % (matches, in_site, res, line)) if matches: (name, groups) = matches[0] if in_site: if name == "site_id": site_id = groups[0] elif res: if name == "type": res["type"] = groups[0] elif name == "id": res["id"] = groups[0] elif name == "zpool_path": res["zpool_path"] = groups[0].strip() elif name == "jrnldev": res["jrnldev"] = groups[0] elif name == "zpool": res["zpool_name"] = groups[0] res["zpool_cache"] = path.join(self.build_dirs["base"], "{0}.zcf".format(groups[0])) res["zpool_args"] = groups[1] elif name == "prefmds": res["prefmds"] = groups[0] elif name == "fsroot": res["fsroot"] = groups[0].strip('"') elif name == "nids": # Read subsequent lines and get the first host tmp = groups[0] while line and ";" not in line: tmp += line line = conf.readline() tmp = re.sub(";\s*$", "", tmp) res["host"] = re.split("\s*,\s*", tmp, 1)[0].strip(" ") elif name == "fin_res": # Check for errors finalizing object res["site_id"] = site_id res["fsuuid"] = fsuuid if not res.finalize(self.sl2objects): self.shutdown() res = None else: if name == "new_res": res = SL2Res(groups[0], site_name) else: if name == "clients": for client in [g.strip() for g in groups[0].split(",")]: client_res = SL2Res(client, None) client_res["type"] = "client" client_res["host"] = client client_res.finalize(self.sl2objects) elif name == "site": site_name = groups[0] in_site = True elif name == "fsuuid": fsuuid = groups[0] line = conf.readline() new_conf_path = path.join(self.build_dirs["base"], "slash.conf") try: with open(new_conf_path, "w") as new_conf_file: new_conf_file.write(new_conf) log.debug("Successfully wrote build slash2 conf at {0}".format(new_conf_path)) for sl2_obj in self.all_objects(): try: ssh = SSH(self.user, sl2_obj["host"], "") log.debug("Copying new config to {0}".format(sl2_obj["host"])) try: ssh.copy_file(new_conf_path, new_conf_path) except IOError: log.critical("Error copying config file to {0}".format(ssh.host)) self.shutdown() ssh.close() except SSHException: log.error("Unable to copy config file to {0}!".format(sl2_obj["host"])) except IOError, e: log.fatal("Unable to write new conf to build directory!") log.fatal(new_conf_path) log.fatal(e) return False except IOError, e: log.fatal("Unable to read conf file at {0}".format(self.conf["slash2"]["conf"])) log.fatal(e) return False
def launch_gdb_sl(tsuite, sock_name, sl2objects, res_bin_type, gdbcmd_path): """Generic slash2 launch service in screen+gdb. Will also copy over authbuf keys. Args: tsuite: tsuite runtime. sock_name: name of sl2 sock. sl2objects: list of objects to be launched. res_bin_type: key to bin path in src_dirs. gdbcmd_path: path to gdbcmd file.""" # res_bin_type NEEDS to be a path in src_dirs assert res_bin_type in tsuite.src_dirs for sl2object in sl2objects: log.debug("Initializing environment > {0} @ {1}".format(sl2object["name"], sl2object["host"])) # Remote connection user, host = tsuite.user, sl2object["host"] log.debug("Connecting to {0}@{1}".format(user, host)) ssh = SSH(user, host, "") # Acquire and deploy authbuf key need_authbuf = handle_authbuf(tsuite, ssh, sl2object["type"]) ls_cmd = "ls {0}/".format(tsuite.build_dirs["ctl"]) result = ssh.run(ls_cmd) present_socks = [res_bin_type in sock for sock in result["out"]].count(True) # Create monolithic reference/replace dict repl_dict = dict(tsuite.src_dirs, **tsuite.build_dirs) repl_dict = dict(repl_dict, **sl2object) if "id" not in sl2object.keys(): sl2object["id"] = 0 # Create gdbcmd from template gdbcmd_build_path = path.join( tsuite.build_dirs["base"], "{0}_{1}".format(sl2object["id"], path.basename(gdbcmd_path)) ) new_gdbcmd = repl_file(repl_dict, gdbcmd_path) if new_gdbcmd: with open(gdbcmd_build_path, "w") as f: f.write(new_gdbcmd) f.close() log.debug("Wrote gdb cmd to {0}".format(gdbcmd_build_path)) log.debug("Remote copying gdbcmd.") ssh.copy_file(gdbcmd_build_path, gdbcmd_build_path) else: log.fatal("Unable to parse gdb cmd at {0}!".format(gdbcmd_path)) tsuite.shutdown() cmd = "sudo gdb -f -x {0} {1}".format(gdbcmd_build_path, tsuite.src_dirs[res_bin_type]) screen_sock_name = "sl.{0}.{1}".format(sock_name, sl2object["id"]) # Launch slash2 in gdb within a screen session ssh.run_screen(cmd, screen_sock_name) # Wait two seconds to make sure slash2 launched without errors time.sleep(2) screen_socks = ssh.list_screen_socks() if screen_sock_name + "-error" in screen_socks or screen_sock_name not in screen_socks: log.fatal( "sl2object {0}:{1} launched with an error. Resume to {2} and resolve it.".format( sl2object["name"], sl2object["id"], screen_sock_name + "-error" ) ) tsuite.shutdown(ignore=sock_name) log.debug("Waiting for {0} sock on {1} to appear.".format(sock_name, host)) count = 0 while True: result = ssh.run(ls_cmd, quiet=True) if not all(res_bin_type not in sock for sock in result["out"]): break time.sleep(1) count += 1 if count == int(tsuite.conf["slash2"]["timeout"]): log.fatal( "Cannot find {0} sock on {1}. Resume to {2} and resolve it. ".format( res_bin_type, sl2object["id"], screen_sock_name ) ) tsuite.shutdown(ignore=sock_name) # grab pid for resouce querying later # TODO: do not grab other running instances sl2object["pid"] = ssh.run("pgrep {0}".format(res_bin_type))["out"][0].strip() log.debug("Found {0} pid on {1} : {2}".format(res_bin_type, host, sl2object["pid"])) if need_authbuf: pull_authbuf(tsuite, ssh) ssh.close()
def get_hardware_information(fqdn): """ Retrieves information of the system. """ try: machine = Machine.objects.get(fqdn=fqdn) except Machine.DoesNotExist: logger.warning("Machine '{}' does not exist".format(fqdn)) return # set needed values for several checks from original machine machine_ = Machine(architecture=machine.architecture) conn = None timer = None try: conn = SSH(fqdn) conn.connect() timer = threading.Timer(5 * 60, conn.close) timer.start() # CPUs logger.debug("Get CPU number...") output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_cpu_number.sh') if output: for line in output: if line.startswith('SOCKETS'): machine_.cpu_physical = int(line.split('=')[1]) elif line.startswith('CORES'): machine_.cpu_cores = int(line.split('=')[1]) elif line.startswith('THREADS'): machine_.cpu_threads = int(line.split('=')[1]) logger.debug("Get CPU type...") output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_cpu_type.sh') if output and output[0]: machine_.cpu_model = output[0].strip() logger.debug("Get CPU flags...") output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_cpu_flags.sh') if output and output[0]: machine_.cpu_flags = output[0].strip() logger.debug("Get CPU speed...") output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_cpu_speed.sh') if output and output[0]: machine_.cpu_speed = Decimal(int(output[0].strip()) / 1000000) logger.debug("Get CPU ID...") output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_cpu_id.sh') if output and output[0]: machine_.cpu_id = output[0].strip() # EFI logger.debug("Check for EFI...") try: efi_file = conn.get_file('/sys/firmware/efi', 'r') efi_file.close() machine_.efi = True except IOError: machine_.efi = False # Memory logger.debug("Get RAM amount...") for line in conn.read_file('/proc/meminfo'): if line.startswith('MemTotal'): machine_.ram_amount = int(int(line.split()[1]) / 1024) # Virtualization capability VM_HOST_MIN_RAM_MB = 7000 machine_.vm_capable = False # Virtualization: x86 logger.debug("Check for VM capability...") if machine_.architecture_id == Architecture.Type.X86_64: cpu_flags = machine_.cpu_flags if cpu_flags: cpu_flags = cpu_flags.upper() if ((cpu_flags.find('VMX') >= 0 or cpu_flags.find('SVM') >= 0) and int(machine_.ram_amount) > VM_HOST_MIN_RAM_MB): machine_.vm_capable = True # Virtualization: ppc64le if machine_.architecture_id == Architecture.Type.PPC64LE: for line in conn.read_file('/proc/cpuinfo'): if line.startswith('firmware') and 'OPAL' in line: machine_.vm_capable = True # Disk logger.debug("Get disk information...") stdout, stderr, exitstatus = conn.execute('hwinfo --disk') for line in stdout: line = line.strip() if line.startswith('Size:'): machine_.disk_primary_size = int( int(line.split()[1]) / 2 / 1024**2) elif line.startswith('Attached to:'): opening_bracket = line.find('(') closing_bracket = line.find(')') if opening_bracket > 0 and closing_bracket > 0: machine_.disk_type = line[opening_bracket + 1:closing_bracket] else: machine_.disk_type = 'Unknown disk type' break # lsmod logger.debug("Get 'lsmod'...") stdout, stderr, exitstatus = conn.execute('lsmod') machine_.lsmod = normalize_ascii("".join(stdout)) # lspci logger.debug("Get 'lspci'...") stdout, stderr, exitstatus = conn.execute('lspci -vvv -nn') machine_.lspci = normalize_ascii("".join(stdout)) # last logger.debug("Get 'last'...") output, stderr, exitstatus = conn.execute( 'last | grep -v reboot | head -n 1') string = ''.join(output) result = string[0:8] + string[38:49] machine_.last = normalize_ascii("".join(result)) # hwinfo logger.debug("Get 'hwinfo' (full)...") stdout, stderr, exitstatus = conn.execute( 'hwinfo --bios ' + '--block --bridge --cdrom --cpu --disk --floppy --framebuffer ' + '--gfxcard --hub --ide --isapnp --isdn --keyboard --memory ' + '--monitor --mouse --netcard --network --partition --pci --pcmcia ' + '--scsi --smp --sound --sys --tape --tv --usb --usb-ctrl --wlan') machine_.hwinfo = normalize_ascii("".join(stdout)) # dmidecode logger.debug("Get 'dmidecode'...") stdout, stderr, exitstatus = conn.execute('dmidecode') machine_.dmidecode = normalize_ascii("".join(stdout)) # dmesg logger.debug("Get 'dmesg'...") stdout, stderr, exitstatus = conn.execute( 'if [ -e /var/log/boot.msg ]; then ' + 'cat /var/log/boot.msg; else journalctl -xl | head -n200; ' + 'fi') machine_.dmesg = normalize_ascii("".join(stdout)) # lsscsi logger.debug("Get 'lsscsi'...") stdout, stderr, exitstatus = conn.execute('lsscsi -s') machine_.lsscsi = normalize_ascii("".join(stdout)) # lsusb logger.debug("Get 'lsusb'...") stdout, stderr, exitstatus = conn.execute('lsusb') machine_.lsusb = normalize_ascii("".join(stdout)) # IPMI logger.debug("Check for IPMI...") machine_.ipmi = machine_.dmidecode.find('IPMI') >= 0 # Firmware script logger.debug("Get BIOS version...") output, stderr, exitstatus = conn.execute_script_remote( 'machine_get_firmware.sh') if output and output[0]: machine_.bios_version = output[0].strip() return machine_ except Exception as e: logger.error("{} ({})".format(fqdn, e)) return False finally: if conn: conn.close() if timer: timer.cancel() return None
def execute(self): """ Executes the task. """ if not ServerConfig.objects.bool_by_key('orthos.debug.motd.write'): logger.warning("Disabled: set 'orthos.debug.motd.write' to 'true'") return BEGIN = '-' * 69 + ' Orthos{ --' LINE = '-' * 80 END = '-' * 69 + ' Orthos} --' try: machine = Machine.objects.get(fqdn=self.fqdn) except Machine.DoesNotExist: logger.error("Machine does not exist: fqdn={}".format(self.fqdn)) return conn = None try: conn = SSH(machine.fqdn) conn.connect() motd = conn.get_file('/etc/motd.orthos', 'w') print(BEGIN, file=motd) print( "Machine of the ARCH team. Contact <{}> for problems.".format( machine.get_support_contact()), file=motd) if machine.comment: print("INFO: " + machine.comment, file=motd) if machine.administrative: print( "This machine is an administrative machine. DON\'T TOUCH!", file=motd) if machine.reserved_by: print(LINE, file=motd) if machine.reserved_until != timezone.ZERO: print("This machine is RESERVED by {} until {}.".format( machine.reserved_by, machine.reserved_until), file=motd) else: print("This machine is RESERVED by {}.".format( machine.reserved_by), file=motd) print('', file=motd) print(wrap80(machine.reserved_reason), file=motd) print(END, file=motd) motd.close() stdout, stderr, exitstatus = conn.execute_script_remote( 'machine_sync_motd.sh') if exitstatus != 0: logger.exception("({}) {}".format(machine.fqdn, stderr)) raise Exception(stderr) except SSH.Exception as e: logger.error("({}) {}".format(machine.fqdn, e)) return False except IOError as e: logger.error("({}) {}".format(machine.fqdn, e)) return False finally: if conn: conn.close()
def get_networkinterfaces(fqdn): """ Retrieves information of the systems network interfaces. """ try: machine = Machine.objects.get(fqdn=fqdn) except Machine.DoesNotExist: logger.warning("Machine '{}' does not exist".format(fqdn)) return False conn = None timer = None try: conn = SSH(machine.fqdn) conn.connect() timer = threading.Timer(5 * 60, conn.close) timer.start() # Network interfaces logger.debug("Collect network interfaces...") stdout, stderr, exitstatus = conn.execute('hwinfo --network') interfaces = [] interface = None for line in stdout: if len(line) > 0 and line[0] != ' ' and line[0] != '\t': if interface and interface.mac_address and\ interface.driver_module not in ('bridge', 'tun'): interfaces.append(interface) interface = NetworkInterface() else: match = re.match(r'\s+Driver: "(\w+)"', line) if match: interface.driver_module = match.group(1) continue match = re.match(r'\s+SysFS ID: ([/\w.]+)', line) if match: interface.sysfs = match.group(1) continue match = re.match(r'\s+HW Address: ([0-9a-fA-F:]+)', line) if match: interface.mac_address = match.group(1).upper() continue match = re.match(r'\s+Device File: ([\w.]+)', line) if match: interface.name = match.group(1) continue if interface and interface.mac_address and\ interface.driver_module not in ('bridge', 'tun'): interfaces.append(interface) for interface in interfaces: if interface.sysfs is None: continue path = '/sys/{}/type'.format(interface.sysfs) arp_type = ''.join(conn.read_file(path)) if arp_type == ARPHRD_IEEE80211: continue stdout, stderr, exitstatus = conn.execute('ethtool %s' % (interface.name)) for line in stdout: match = re.match(r'\s+Port: (.+)', line) if match: interface.ethernet_type = match.group(1) return interfaces except Exception as e: logger.error("{} ({})".format(fqdn, e)) return False finally: if conn: conn.close() if timer: timer.cancel() return None
def get_status_ip(fqdn): """ Retrieves information of the systems IPv4/IPv6 status. """ try: machine = Machine.objects.get(fqdn=fqdn) except Machine.DoesNotExist: logger.warning("Machine '{}' does not exist".format(fqdn)) return False machine_ = Machine() conn = None timer = None try: conn = SSH(machine.fqdn) conn.connect() timer = threading.Timer(5 * 60, conn.close) timer.start() logger.debug("Check IPv4/IPv6 status...") stdout, stderr, exitstatus = conn.execute('/sbin/ip a') devices = {} current_device = None addresses = {'inet': [], 'inet6': []} for line in stdout: match = re.match(r'^\d+:\s+([a-zA-Z0-9]+):\s+<.*>\s(.*)\n', line) if match: current_device = match.group(1) devices[current_device] = { 'mac_address': None, 'inet': None, 'inet6': None, 'flags': None } devices[current_device]['flags'] = match.group(2).split() continue line = line.lstrip() match = re.match(r'inet ([0-9.]{7,15})\/.*scope', line) if match: if devices[current_device]['inet'] is None: devices[current_device]['inet'] = [] devices[current_device]['inet'].append(match.group(1)) continue match = re.match(r'inet6 ([a-f0-9:]*)\/[0-9]+ scope', line) if match: if devices[current_device]['inet6'] is None: devices[current_device]['inet6'] = [] devices[current_device]['inet6'].append(match.group(1)) continue match = re.match('link/ether ([a-f0-9:]{17}) brd', line) if match: devices[current_device]['mac_address'] = match.group(1).upper() for device, values in devices.items(): if values['mac_address'] is None: continue # ignore device if hooking up another if any(device in values['flags'] for device in devices.keys()): continue if values['mac_address'] == machine.mac_address: if values['inet'] is None: machine_.status_ipv4 = Machine.StatusIP.AF_DISABLED elif machine.ipv4 not in values['inet']: machine_.status_ipv4 = Machine.StatusIP.NO_ADDRESS if [ ipv4 for ipv4 in values['inet'] if not ipv4.startswith('127.0.0.1') ]: machine_.status_ipv4 = Machine.StatusIP.ADDRESS_MISMATCH elif machine.ipv4 in values['inet']: machine_.status_ipv4 = Machine.StatusIP.CONFIRMED else: machine_.status_ipv4 = Machine.StatusIP.MISSING if values['inet6'] is None: machine_.status_ipv6 = Machine.StatusIP.AF_DISABLED elif machine.ipv6 not in values['inet6']: machine_.status_ipv6 = Machine.StatusIP.NO_ADDRESS if [ ipv6 for ipv6 in values['inet6'] if not ipv6.startswith('fe80::') ]: machine_.status_ipv6 = Machine.StatusIP.ADDRESS_MISMATCH elif machine.ipv6 in values['inet6']: machine_.status_ipv6 = Machine.StatusIP.CONFIRMED addresses['inet'].append(values['inet']) addresses['inet6'].append(values['inet6']) if machine_.status_ipv4 == Machine.StatusIP.NO_ADDRESS: if machine.ipv4 in addresses['inet']: machine_.status_ipv4 = Machine.StatusIP.MAC_MISMATCH if machine_.status_ipv6 == Machine.StatusIP.NO_ADDRESS: if machine.ipv6 in addresses['inet6']: machine_.status_ipv6 = Machine.StatusIP.MAC_MISMATCH return machine_ except Exception as e: logger.error("{} ({})".format(fqdn, e)) return False finally: if conn: conn.close() if timer: timer.cancel() return None
def get_pci_devices(fqdn): """ Retrieves all PCI devices. """ def get_pci_device_by_slot(pci_devices, slot): """ Returns the PCI device by slot. """ for dev in pci_devices: pci_slot = dev.slot if not pci_slot: continue pci_slot = pci_slot.strip() # pci domain hacks if len(pci_slot) < 8: pci_slot = '0000:' + pci_slot if len(slot) < 8: slot = '0000:' + slot if pci_slot == slot: return dev return None from data.models import PCIDevice try: machine = Machine.objects.get(fqdn=fqdn) except Machine.DoesNotExist: logger.warning("Machine '{}' does not exist".format(fqdn)) return False conn = None timer = None try: conn = SSH(fqdn) conn.connect() timer = threading.Timer(5 * 60, conn.close) timer.start() logger.debug("Collect PCI devices for '{}'...".format(machine.fqdn)) pci_devices = [] chunk = '' stdout, stderr, exitstatus = conn.execute('lspci -mmvn') for line in stdout: if len(line.strip()) == 0: pci_devices.append(PCIDevice.from_lspci_mmnv(chunk)) chunk = '' else: chunk += line # drivers for PCI devices from hwinfo in_pci_device = False current_busid = None if machine.hwinfo: for line in machine.hwinfo.splitlines(): if re.match(r'^\d+: PCI', line): in_pci_device = True continue if len(line.strip()) == 0: in_pci_device = False current_busid = None continue if not in_pci_device: continue match = re.match(r' SysFS BusID: ([0-9a-fA-F.:]+)', line) if match: current_busid = match.group(1) match = re.match(r' Driver: "([^"]*)"', line) if match and current_busid: pcidev = get_pci_device_by_slot(pci_devices, current_busid) if pcidev: pcidev.drivermodule = match.group(1) match = re.match(r' Driver Modules: "([^"]*)"', line) if match and current_busid: pcidev = get_pci_device_by_slot(pci_devices, current_busid) if pcidev: pcidev.drivermodule = match.group(1) for pci_device in pci_devices: pci_device.machine = machine logger.debug("Collected {} PCI devices for '{}'".format( len(pci_devices), machine.fqdn)) return pci_devices except Exception as e: logger.exception("{} ({})".format(fqdn, e)) return False finally: if conn: conn.close() if timer: timer.cancel()
def _perform(self, action): """ Common implementation for on, off and reset. """ name = self.machine.hostname virsh = 'virsh -c lxc:///' conn = None result = False if not self.machine.hypervisor.fqdn: logger.error("No hypervisor system found") raise Exception("No hypervisor found") conn = SSH(self.machine.hypervisor.fqdn) conn.connect() if action == 'status': stdout, stderr, exitstatus = conn.execute( '{} list --all'.format(virsh)) if exitstatus != 0: logger.error(''.join(stderr)) raise Exception(''.join(stderr)) for line in stdout[2:]: columns = line.strip().split() if columns[1] == name: return { 'running': self.Status.ON, 'shut': self.Status.OFF, 'paused': self.Status.PAUSED }.get(columns[2], 0) raise Exception("Couldn't find domain '{}'!".format(name)) elif action == 'off': stdout, stderr, exitstatus = conn.execute('{} destroy {}'.format( virsh, name)) if exitstatus == 0: logger.debug("Virtual machine '{}' stopped".format(name)) result = True else: logger.error(''.join(stderr)) raise Exception(''.join(stderr)) elif action == 'on': stdout, stderr, exitstatus = conn.execute('{} start {}'.format( virsh, name)) if exitstatus == 0: logger.debug("Virtual machine '{}' started".format(name)) result = True else: logger.error(''.join(stderr)) raise Exception(''.join(stderr)) else: logger.warning("Action '{}' does not exist".format(action)) result = False if conn: conn.close() return result
def execute(self): """ Executes the task. """ from data.models import Machine, SerialConsole if not ServerConfig.objects.bool_by_key( 'orthos.debug.serialconsole.write'): logger.warning( "Disabled: set 'orthos.debug.serialconsole.write' to 'true'") return try: cscreen_server = Machine.objects.get(fqdn=self.fqdn) except Machine.DoesNotExist: logger.warning("Serial console server does not exist: {}".format( self.fqdn)) conn = None try: conn = SSH(cscreen_server.fqdn) conn.connect() stdout, stderr, exitstatus = conn.execute( 'sudo touch /etc/cscreenrc_allow_update') if exitstatus != 0: raise Exception( "Couldn't lock cscreen ('touch /etc/cscreenrc_allow_update')" ) new_content = '' for serialconsole in SerialConsole.cscreen.get(cscreen_server): new_content += serialconsole.get_comment_record() + '\n' new_content += serialconsole.get_command_record() + '\n' screenrc_file = '/etc/cscreenrc' # create `/etc/cscreenrc` if it doesn't exist stdout, stderr, exitstatus = conn.execute( '[ -e "{}"]'.format(screenrc_file)) orthos_inline_begin = ServerConfig.objects.by_key( 'orthos.configuration.inline.begin') orthos_inline_end = ServerConfig.objects.by_key( 'orthos.configuration.inline.end') if exitstatus != 0: stdout, stderr, exitstatus = conn.execute( 'echo "{}\n{}" > {}'.format(orthos_inline_begin, screenrc_file, orthos_inline_end)) if exitstatus != 0: raise Exception("Couldn't create CScreen file ('{}')".format( screenrc_file)) # Save backup file which is used later by an invoked script # to determine the changes and update the running screen # session (add, remove or restart modified entries). stdout, stderr, exitstatus = conn.execute( 'sudo cp {} {}.old'.format(screenrc_file, screenrc_file)) cscreen = conn.get_file(screenrc_file, 'r') buffer = '' in_replace = False for line in cscreen.readlines(): if not in_replace and line.startswith(orthos_inline_begin): buffer += line + new_content in_replace = True elif in_replace and line.startswith(orthos_inline_end): buffer += line in_replace = False elif not in_replace: buffer += line cscreen.close() cscreen = conn.get_file(screenrc_file, 'w') buffer = buffer.strip('\n') print(buffer, file=cscreen) cscreen.close() stdout, stderr, exitstatus = conn.execute( 'sudo /usr/bin/cscreen -u') logger.info("CScreen update exited with: {}".format(exitstatus)) stdout, stderr, exitstatus = conn.execute( 'sudo rm -f /etc/cscreenrc_allow_update') if exitstatus != 0: raise Exception( "Couldn't unlock CScreen ('rm /etc/cscreenrc_allow_update')" ) except SSH.Exception as exception: logger.error(exception) except IOError as exception: logger.error(exception) finally: if conn: conn.close()
class CobblerServer: def __init__(self, fqdn, domain): self._fqdn = fqdn self._conn = None self._domain = domain self._cobbler_path = ServerConfig.objects.by_key("cobbler.command") def connect(self): """ Connect to DHCP server via SSH. """ if not self._conn: self._conn = SSH(self._fqdn) self._conn.connect() def close(self): """ Close connection to DHCP server. """ if self._conn: self._conn.close() def deploy(self): self.connect() if not self.is_installed(): raise SystemError("No Cobbler service found: {}".format(self._fqdn)) machines = Machine.active_machines.filter(fqdn_domain=self._domain.pk) cobbler_machines = self.get_machines() cobbler_commands = [] for machine in machines: if machine.fqdn in cobbler_machines: cobbler_commands.append(get_cobbler_update_command(machine, self._cobbler_path)) else: cobbler_commands.append(get_cobbler_add_command(machine, self._cobbler_path)) for command in cobbler_commands: # TODO: Convert this to a single ssh call (performance) _, stderr, exitcode = self._conn.execute(command) if exitcode: logger.error("failed to execute %s on %s", command, self._fqdn) self.close() def is_installed(self): """ Check if Cobbler server is available. """ if self._conn.check_path(self._cobbler_path, '-x'): return True return False def is_running(self): """ Check if the Cobbler daemon is running via the cobbler version command """ command = f"{self._cobbler_path} version" _, _, exitstatus = self._conn.execute(command) if exitstatus == 0: return True return False def get_machines(self): stdout, stderr, exitstatus = self._conn.execute( "{cobbler} system list".format(cobbler=self._cobbler_path)) if exitstatus: logger.warning("system list failed on %s with %s", self._fqdn, stderr) raise CobblerException("system list failed on {server}".format(server=self._fqdn)) clean_out = [system.strip(' \n\t') for system in stdout] return clean_out
class Libvirt(VirtualizationAPI): class Meta: proxy = True VIRSH = 'virsh -c qemu:///system' IGNORE_STDERR = [ 'domain is not running', 'no domain with matching name' ] QEMU_IMG_CONVERT = '/usr/bin/qemu-img convert -O qcow2 -o preallocation=metadata {0}.tmp {0}' def __init__(self): self.conn = None def get_image_list(self): """ Returns the available architectures and the full image list (over all available architectures). Return format: ( ['<arch1>', '<arch2>', ...], [('<value>', '<option>'), ...] ) """ from data.models import ServerConfig architectures = [self.host.architecture.name] image_directory = ServerConfig.objects.by_key('virtualization.libvirt.images.directory') image_list = [] try: for architecture in architectures: directory = '{}/{}/'.format(image_directory.rstrip('/'), architecture) for image in os.listdir(directory): path = directory + image size = os.path.getsize(path) atime = str(date.fromtimestamp(os.path.getmtime(path))) if size < (1024**3): size = int(size / (1024**2)) size = '{}M'.format(size) else: size = int(size / (1024**3)) size = '{}G'.format(size) pretty_image = image.split('.')[0] image_list.append((image, '{} ({} {})'.format(pretty_image, atime, size))) except FileNotFoundError as e: logger.exception(e) return (architectures, image_list) def connect(function): """ Create SSH connection if needed. """ def decorator(self, *args, **kwargs): from utils.ssh import SSH if not self.conn: self.conn = SSH(self.host.fqdn) self.conn.connect() return function(self, *args, **kwargs) return decorator @connect def _execute(self, command): return self.conn.execute(command) def check_connection(self): """ Check libvirt connection (running libvirt). """ stdout, stderr, exitstatus = self._execute('{} version'.format(self.VIRSH)) if exitstatus == 0: return True return False def get_list(self, parameters='--all'): """ Return `virsh list` output. """ stdout, stderr, exitstatus = self._execute('{} list {}'.format(self.VIRSH, parameters)) if exitstatus == 0: return ''.join(stdout) else: raise Exception(''.join(stderr)) return False def check_network_bridge(self, bridge='br0'): """ Execute `create_bridge.sh` script remotely and try to set up bridge if it doesn't exist. Returns true if the bridge is available, false otherwise. """ stdout, stderr, exitstatus = self.conn.execute_script_remote('create_bridge.sh') if exitstatus != 0: raise Exception(''.join(stderr)) stdout, stderr, exitstatus = self.conn.execute('brctl show') if exitstatus != 0: raise Exception(''.join(stderr)) for line in stdout: if line.startswith(bridge): return True raise False def generate_hostname(self): """ Generate domain name (hostname). Check hostnames against Orthos machines and libvirt `virsh list`. """ hostname = None occupied_hostnames = set(vm.hostname for vm in self.host.get_virtual_machines()) libvirt_list = self.get_list() for line in libvirt_list.split('\n')[2:]: columns = line.strip().split() if len(columns) > 0: domain_name = columns[1] occupied_hostnames.add(domain_name) for i in range(1, self.host.vm_max + 1): hostname_ = '{}-{}'.format(self.host.hostname, i) if hostname_ not in occupied_hostnames: hostname = hostname_ break if hostname is None: raise Exception("All hostnames (domain names) busy!") return hostname def generate_networkinterfaces(self, amount=1, bridge='br0', model='virtio'): """ Generate networkinterfaces. """ from data.models import NetworkInterface networkinterfaces = [] for i in range(amount): mac_address = get_random_mac_address() while NetworkInterface.objects.filter(mac_address=mac_address).count() != 0: mac_address = get_random_mac_address() networkinterface = NetworkInterface(mac_address=mac_address) networkinterface.bridge = bridge networkinterface.model = model networkinterfaces.append(networkinterface) return networkinterfaces def copy_image(self, image, disk_image): """ Copy and allocate disk image. """ stdout, stderr, exitstatus = self.conn.execute('cp {} {}.tmp'.format(image, disk_image)) if exitstatus != 0: return False stdout, stderr, exitstatus = self.conn.execute(self.QEMU_IMG_CONVERT.format(disk_image)) if exitstatus != 0: return False stdout, stderr, exitstatus = self.conn.execute('rm -rf {}.tmp'.format(disk_image)) if exitstatus != 0: return False return True def delete_disk_image(self, disk_image): """ Delete the old disk image. """ stdout, stderr, exitstatus = self.conn.execute('rm -rf {}'.format(disk_image)) if exitstatus != 0: return False return True def calculate_vcpu(self): """ Return virtual CPU amount. """ vcpu = 1 host_cpu_cores = self.host.cpu_cores if host_cpu_cores is not None: vcpu = int((host_cpu_cores - 2) / self.host.vm_max) if vcpu == 0: vcpu = 1 return vcpu def check_memory(self, memory_amount): """ Check if memory amount for VM is available on host. Reserve 2GB of memory for host system. """ host_ram_amount = self.host.ram_amount host_reserved_ram_amount = 2048 if host_ram_amount: if memory_amount > (host_ram_amount - host_reserved_ram_amount): raise Exception("Host system has only {}MB of memory!".format(memory_amount)) else: raise Exception("Can't detect memory size of host system '{}'".format(self.host)) return True def execute_virt_install(self, *args, dry_run=True, **kwargs): """ Run `virt-install` command. """ command = '/usr/bin/virt-install ' command += '--name {hostname} ' command += '--vcpus {vcpu} ' command += '--memory {memory} ' disk_ = '--disk {},'.format(kwargs['disk']['image']) disk_ += 'size={},'.format(kwargs['disk']['size']) disk_ += 'format={},'.format(kwargs['disk']['format']) disk_ += 'sparse={},'.format(kwargs['disk']['sparse']) disk_ += 'bus={} '.format(kwargs['disk']['bus']) command += disk_ for networkinterface in kwargs.get('networkinterfaces', []): networkinterface_ = '--network model={},'.format(networkinterface.model) networkinterface_ += 'bridge={},'.format(networkinterface.bridge) networkinterface_ += 'mac={} '.format(networkinterface.mac_address) command += networkinterface_ command += '{boot} ' vnc = kwargs.get('vnc', None) if vnc and vnc['enabled']: command += '--graphics vnc,listen=0.0.0.0,port={} '.format(vnc['port']) command += kwargs.get('parameters', '') if dry_run: command += '--dry-run' command = command.format(**kwargs) logger.debug(command) stdout, stderr, exitstatus = self.conn.execute(command) if exitstatus != 0: raise Exception(''.join(stderr)) return True def _create(self, vm, *args, **kwargs): """ Wrapper function for creating a VM. Steps: - check connection to host - check maxinmum VM number limit - check network bridge - check image source directory (if needed) - check Open Virtual Machine Firmware (OVMF) binary (if needed) - check memory size - generate hostname (=domain name) - copy image to disk image (if needed) - run `virt-install` """ from data.models import ServerConfig from data.models import NetworkInterface bridge = ServerConfig.objects.by_key('virtualization.libvirt.bridge') image_directory = ServerConfig.objects.by_key('virtualization.libvirt.images.directory') disk_image_directory = '/abuild/orthos-vm-images/' disk_image = '{}/{}.qcow2'.format(disk_image_directory.rstrip('/'), '{}') ovmf = ServerConfig.objects.by_key('virtualization.libvirt.ovmf.path') image_directory = '{}/{}/'.format( image_directory.rstrip('/'), kwargs['architecture'] ) if not self.check_connection(): raise Exception("Host system not reachable!") if self.host.get_virtual_machines().count() >= self.host.vm_max: raise Exception("Maximum number of VMs reached!") if not self.check_network_bridge(bridge=bridge): raise Exception("Network bridge setup failed!") if not kwargs['image'] is None: if not self.conn.check_path(image_directory, '-e'): raise Exception("Image source directory missing on host system!") if not self.conn.check_path(disk_image_directory, '-w'): raise Exception( "Image disk directory missing on host system: {}!".format(disk_image_directory) ) if kwargs['uefi_boot']: if not self.conn.check_path(ovmf, '-e'): raise Exception("OVMF file not found: '{}'!".format(ovmf)) boot = '--boot loader={},network'.format(ovmf) else: boot = '--boot network,hd,menu=off' self.check_memory(kwargs['ram_amount']) vm.hostname = self.generate_hostname() vm.fqdn = '{}.{}'.format(vm.hostname, self.host.fqdn_domain.name) vnc_port = 5900 + int(vm.hostname.split('-')[1]) vm.vnc = { 'enabled': kwargs['vnc'], 'port': vnc_port } vm.cpu_cores = self.calculate_vcpu() vm.ram_amount = kwargs['ram_amount'] disk_image = disk_image.format(vm.hostname) if kwargs['image'] is not None: image = '{}/{}'.format(image_directory.rstrip('/'), kwargs['image']) if not self.copy_image(image, disk_image): raise Exception("Couldn't copy image: {} > {}!".format(image, disk_image)) else: self.delete_disk_image(disk_image) disk = { 'image': disk_image, 'size': kwargs['disk_size'], 'format': 'qcow2', 'sparse': True, 'bus': 'virtio' } networkinterfaces = self.generate_networkinterfaces( amount=kwargs['networkinterfaces'], bridge=bridge ) parameters = '--events on_reboot=restart,on_poweroff=destroy ' parameters += '--import ' parameters += '--noautoconsole ' parameters += '--autostart ' parameters += kwargs['parameters'] self.execute_virt_install( hostname=vm.hostname, vcpu=vm.cpu_cores, memory=vm.ram_amount, disk=disk, networkinterfaces=networkinterfaces, boot=boot, vnc=vm.vnc, parameters=parameters, dry_run=False ) vm.unsaved_networkinterfaces = [] for networkinterface in networkinterfaces: vm.unsaved_networkinterfaces.append(networkinterface) return True def _remove(self, vm): """ Wrapper function for removing a VM (destroy domain > undefine domain). """ if not self.check_connection(): raise Exception("Host system not reachable!") self.destroy(vm) self.undefine(vm) return True def destroy(self, vm): """ Destroy VM on host system. Ignore `domain is not running` error and proceed. """ stdout, stderr, exitstatus = self._execute('{} destroy {}'.format(self.VIRSH, vm.hostname)) if exitstatus != 0: stderr = ''.join(stderr) if not any(line in stderr for line in self.IGNORE_STDERR): raise Exception(stderr) return True def undefine(self, vm): """ Undefine VM on host system. """ stdout, stderr, exitstatus = self._execute('{} undefine {}'.format(self.VIRSH, vm.hostname)) if exitstatus != 0: stderr = ''.join(stderr) if not any(line in stderr for line in self.IGNORE_STDERR): raise Exception(stderr) return True