def write_rdma_config_to_device(path, ipv4_addr, mac_addr): data = RDMADeviceHandler.generate_rdma_config(ipv4_addr, mac_addr) logger.info( "RDMA: Updating device with configuration: {0}".format(data)) with open(path, "w") as f: f.write(data) logger.info("RDMA: Updated device with IPv4/MAC addr successfully")
def _purge_agents(self): """ Remove from disk all directories and .zip files of unknown agents (without removing the current, running agent). """ path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) known_versions = [agent.version for agent in self.agents] if not is_current_agent_installed() and CURRENT_VERSION not in known_versions: logger.warn( u"Running Agent {0} was not found in the agent manifest - adding to list", CURRENT_VERSION) known_versions.append(CURRENT_VERSION) for agent_path in glob.iglob(path): try: name = fileutil.trim_ext(agent_path, "zip") m = AGENT_DIR_PATTERN.match(name) if m is not None and FlexibleVersion(m.group(1)) not in known_versions: if os.path.isfile(agent_path): logger.info(u"Purging outdated Agent file {0}", agent_path) os.remove(agent_path) else: logger.info(u"Purging outdated Agent directory {0}", agent_path) shutil.rmtree(agent_path) except Exception as e: logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) return
def forward_signal(self, signum, frame): # Note: # - At present, the handler is registered only for SIGTERM. # However, clean shutdown is both SIGTERM and SIGKILL. # A SIGKILL handler is not being registered at this time to # minimize perturbing the code. if signum in (signal.SIGTERM, signal.SIGKILL): self._shutdown() if self.child_process is None: return logger.info( u"Agent {0} forwarding signal {1} to {2}", CURRENT_AGENT, signum, self.child_agent.name if self.child_agent is not None else CURRENT_AGENT) self.child_process.send_signal(signum) if self.signal_handler not in (None, signal.SIG_IGN, signal.SIG_DFL): self.signal_handler(signum, frame) elif self.signal_handler is signal.SIG_DFL: if signum == signal.SIGTERM: # TODO: This should set self.running to False vs. just exiting sys.exit(0) return
def is_loopback(self, ifname): s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) result = fcntl.ioctl(s.fileno(), 0x8913, struct.pack('256s', ifname[:15])) flags, = struct.unpack('H', result[16:18]) isloopback = flags & 8 == 8 logger.info('interface [{0}] has flags [{1}], is loopback [{2}]'.format(ifname, flags, isloopback)) return isloopback
def _ensure_no_orphans(self, orphan_wait_interval=ORPHAN_WAIT_INTERVAL): previous_pid_file, pid_file = self._write_pid_file() if previous_pid_file is not None: try: pid = fileutil.read_file(previous_pid_file) wait_interval = orphan_wait_interval while self.osutil.check_pid_alive(pid): wait_interval -= GOAL_STATE_INTERVAL if wait_interval <= 0: logger.warn( u"{0} forcibly terminated orphan process {1}", CURRENT_AGENT, pid) os.kill(pid, signal.SIGKILL) break logger.info( u"{0} waiting for orphan process {1} to terminate", CURRENT_AGENT, pid) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn( u"Exception occurred waiting for orphan agent to terminate: {0}", ustr(e)) return
def get_protocol_by_file(self): """ Detect protocol by tag file. If a file "useMetadataEndpoint.tag" is found on provision iso, metedata protocol will be used. No need to probe for wire protocol :returns: protocol instance """ self.lock.acquire() try: if self.protocol is not None: return self.protocol try: self.protocol = self._get_protocol() return self.protocol except ProtocolNotFoundError: pass logger.info("Detect protocol by file") tag_file_path = os.path.join(conf.get_lib_dir(), TAG_FILE_NAME) protocols = [] if os.path.isfile(tag_file_path): protocols.append("MetadataProtocol") else: protocols.append("WireProtocol") self.protocol = self._detect_protocol(protocols) return self.protocol finally: self.lock.release()
def refresh_repos(self): logger.info("RDMA: refreshing yum repos") if shellutil.run('yum clean all') != 0: raise Exception('Cleaning yum repositories failed') if shellutil.run('yum updateinfo') != 0: raise Exception('Failed to act on yum repo update information') logger.info("RDMA: repositories refreshed")
def _detect_protocol(self, protocols): """ Probe protocol endpoints in turn. """ self.clear_protocol() for retry in range(0, MAX_RETRY): for protocol_name in protocols: try: protocol = self._detect_wire_protocol() \ if protocol_name == "WireProtocol" \ else self._detect_metadata_protocol() IOErrorCounter.set_protocol_endpoint( endpoint=protocol.endpoint) return protocol except ProtocolError as e: logger.info("Protocol endpoint not found: {0}, {1}", protocol_name, e) if retry < MAX_RETRY -1: logger.info("Retry detect protocols: retry={0}", retry) time.sleep(PROBE_INTERVAL) raise ProtocolNotFoundError("No protocol found.")
def get_protocol(self): """ Detect protocol by endpoints :returns: protocol instance """ self.lock.acquire() try: if self.protocol is not None: return self.protocol try: self.protocol = self._get_protocol() return self.protocol except ProtocolNotFoundError: pass logger.info("Detect protocol endpoints") protocols = ["WireProtocol", "MetadataProtocol"] self.protocol = self._detect_protocol(protocols) return self.protocol finally: self.lock.release()
def deploy_ssh_pubkey(self, username, pubkey): logger.info('deploy_ssh_pubkey') username = '******' path, thumbprint, value = pubkey path = self._replace_user(path, username) super(GaiaOSUtil, self).deploy_ssh_pubkey( username, (path, thumbprint, value))
def deploy_ssh_keypair(self, username, keypair): logger.info('deploy_ssh_keypair') username = '******' path, thumbprint = keypair path = self._replace_user(path, username) super(GaiaOSUtil, self).deploy_ssh_keypair( username, (path, thumbprint))
def wait_for_ovfenv(self, max_retry=1800, sleep_time=1): """ Wait for cloud-init to copy ovf-env.xml file from provision ISO """ ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME) for retry in range(0, max_retry): if os.path.isfile(ovf_file_path): try: ovf_env = OvfEnv(fileutil.read_file(ovf_file_path)) self.handle_provision_guest_agent(ovf_env.provision_guest_agent) return except ProtocolError as pe: raise ProvisionError("OVF xml could not be parsed " "[{0}]: {1}".format(ovf_file_path, ustr(pe))) else: if retry < max_retry - 1: logger.info( "Waiting for cloud-init to copy ovf-env.xml to {0} " "[{1} retries remaining, " "sleeping {2}s]".format(ovf_file_path, max_retry - retry, sleep_time)) if not self.validate_cloud_init(): logger.warn("cloud-init does not appear to be running") time.sleep(sleep_time) raise ProvisionError("Giving up, ovf-env.xml was not copied to {0} " "after {1}s".format(ovf_file_path, max_retry * sleep_time))
def enable_swap(self, mount_point): logger.info("Enable swap") try: size_mb = conf.get_resourcedisk_swap_size_mb() self.create_swap_space(mount_point, size_mb) except ResourceDiskError as e: logger.error("Failed to enable swap {0}", e)
def __init__(self, name, path_maker): """ Construct CGroups object. Create appropriately-named directory for each hierarchy of interest. :param str name: Name for the cgroup (usually the full name of the extension) :param path_maker: Function which constructs the root path for a given hierarchy where this cgroup lives """ if name == "": self.name = "Agents+Extensions" self.is_wrapper_cgroup = True else: self.name = name self.is_wrapper_cgroup = False self.cgroups = {} self.threshold = None if not self.enabled(): return system_hierarchies = os.listdir(BASE_CGROUPS) for hierarchy in CGroups._hierarchies: if hierarchy not in system_hierarchies: self.disable() raise CGroupsException("Hierarchy {0} is not mounted".format(hierarchy)) cgroup_name = "" if self.is_wrapper_cgroup else self.name cgroup_path = path_maker(hierarchy, cgroup_name) if not os.path.isdir(cgroup_path): logger.info("Creating cgroup directory {0}".format(cgroup_path)) CGroups._try_mkdir(cgroup_path) self.cgroups[hierarchy] = cgroup_path
def add_user(self, username, encrypted_password, account_expiration): try: expiration_date = (account_expiration + timedelta(days=1)).strftime(DATE_FORMAT) logger.verbose("Adding user {0} with expiration date {1}" .format(username, expiration_date)) self.os_util.useradd(username, expiration_date, REMOTE_ACCESS_ACCOUNT_COMMENT) except OSError as oe: logger.error("Error adding user {0}. {1}" .format(username, oe.strerror)) return except Exception as e: logger.error("Error adding user {0}. {1}".format(username, ustr(e))) return try: prv_key = os.path.join(conf.get_lib_dir(), TRANSPORT_PRIVATE_CERT) pwd = self.cryptUtil.decrypt_secret(encrypted_password, prv_key) self.os_util.chpasswd(username, pwd, conf.get_password_cryptid(), conf.get_password_crypt_salt_len()) self.os_util.conf_sudoer(username) logger.info("User '{0}' added successfully with expiration in {1}" .format(username, expiration_date)) return except OSError as oe: self.handle_failed_create(username, oe.strerror) except Exception as e: self.handle_failed_create(username, ustr(e))
def _write_pid_file(self): pid_files = self._get_pid_files() pid_dir, pid_name, pid_re = self._get_pid_parts() previous_pid_file = None \ if len(pid_files) <= 0 \ else pid_files[-1] pid_index = -1 \ if previous_pid_file is None \ else int(pid_re.match(os.path.basename(previous_pid_file)).group(1)) pid_file = os.path.join(pid_dir, "{0}_{1}".format(pid_index+1, pid_name)) try: fileutil.write_file(pid_file, ustr(os.getpid())) logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid())) except Exception as e: pid_file = None logger.warn( u"Expection writing goal state agent {0} pid to {1}: {2}", CURRENT_AGENT, pid_file, ustr(e)) return pid_files, pid_file
def update_tracked(ext_handlers): """ Track CGroups for all enabled extensions. Track CGroups for services created by enabled extensions. Stop tracking CGroups for not-enabled extensions. :param List(ExtHandler) ext_handlers: """ if not CGroups.enabled(): return not_enabled_extensions = set() for extension in ext_handlers: if extension.properties.state == u"enabled": CGroupsTelemetry.track_extension(extension.name) else: not_enabled_extensions.add(extension.name) names_now_tracked = set(CGroupsTelemetry._tracked.keys()) if CGroupsTelemetry.tracked_names != names_now_tracked: now_tracking = " ".join("[{0}]".format(name) for name in sorted(names_now_tracked)) if len(now_tracking): logger.info("After updating cgroup telemetry, tracking {0}".format(now_tracking)) else: logger.warn("After updating cgroup telemetry, tracking no cgroups.") CGroupsTelemetry.tracked_names = names_now_tracked
def reboot_system(self): """Reboot the system. This is required as the kernel module for the rdma driver cannot be unloaded with rmmod""" logger.info('RDMA: Rebooting system.') ret = shellutil.run('shutdown -r now') if ret != 0: logger.error('RDMA: Failed to reboot the system')
def setup_rdma_device(): logger.verbose("Parsing SharedConfig XML contents for RDMA details") xml_doc = parse_doc( fileutil.read_file(os.path.join(conf.get_lib_dir(), SHARED_CONF_FILE_NAME))) if xml_doc is None: logger.error("Could not parse SharedConfig XML document") return instance_elem = find(xml_doc, "Instance") if not instance_elem: logger.error("Could not find <Instance> in SharedConfig document") return rdma_ipv4_addr = getattrib(instance_elem, "rdmaIPv4Address") if not rdma_ipv4_addr: logger.error( "Could not find rdmaIPv4Address attribute on Instance element of SharedConfig.xml document") return rdma_mac_addr = getattrib(instance_elem, "rdmaMacAddress") if not rdma_mac_addr: logger.error( "Could not find rdmaMacAddress attribute on Instance element of SharedConfig.xml document") return # add colons to the MAC address (e.g. 00155D33FF1D -> # 00:15:5D:33:FF:1D) rdma_mac_addr = ':'.join([rdma_mac_addr[i:i+2] for i in range(0, len(rdma_mac_addr), 2)]) logger.info("Found RDMA details. IPv4={0} MAC={1}".format( rdma_ipv4_addr, rdma_mac_addr)) # Set up the RDMA device with collected informatino RDMADeviceHandler(rdma_ipv4_addr, rdma_mac_addr).start() logger.info("RDMA: device is set up") return
def mount_dvd(self, max_retry=6, chk_err=True, dvd_device=None, mount_point=None, sleep_time=5): if dvd_device is None: dvd_device = self.get_dvd_device() if mount_point is None: mount_point = conf.get_dvd_mount_point() if not os.path.isdir(mount_point): os.makedirs(mount_point) for retry in range(0, max_retry): retcode = self.mount(dvd_device, mount_point, option="-o ro -t udf", chk_err=False) if retcode == 0: logger.info("Successfully mounted DVD") return if retry < max_retry - 1: mountlist = shellutil.run_get_output("/sbin/mount")[1] existing = self.get_mount_point(mountlist, dvd_device) if existing is not None: logger.info("{0} is mounted at {1}", dvd_device, existing) return logger.warn("Mount DVD failed: retry={0}, ret={1}", retry, retcode) time.sleep(sleep_time) if chk_err: raise OSUtilError("Failed to mount DVD.")
def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]): """ Wrapper for subprocess.check_output. Execute 'cmd'. Returns return code and STDOUT, trapping expected exceptions. Reports exceptions to Error if chk_err parameter is True """ if log_cmd: logger.verbose(u"Command: [{0}]", cmd) try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) output = ustr(output, encoding='utf-8', errors="backslashreplace") except subprocess.CalledProcessError as e: output = ustr(e.output, encoding='utf-8', errors="backslashreplace") if chk_err: msg = u"Command: [{0}], " \ u"return code: [{1}], " \ u"result: [{2}]".format(cmd, e.returncode, output) if e.returncode in expected_errors: logger.info(msg) else: logger.error(msg) return e.returncode, output except Exception as e: if chk_err: logger.error(u"Command [{0}] raised unexpected exception: [{1}]" .format(cmd, ustr(e))) return -1, ustr(e) return 0, output
def process(self): try: RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr) skip_rdma_device = False retcode,out = shellutil.run_get_output("modinfo hv_network_direct") if retcode == 0: version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE) if version: v1 = int(version.groups(0)[0]) v2 = int(version.groups(0)[1]) if v1>4 or v1==4 and v2>0: logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later") skip_rdma_device = True else: logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.") else: logger.warn("RDMA: failed to get module info on hv_network_direct.") if not skip_rdma_device: RDMADeviceHandler.wait_rdma_device( self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec) RDMADeviceHandler.write_rdma_config_to_device( self.rdma_dev, self.ipv4_addr, self.mac_addr) RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr) except Exception as e: logger.error("RDMA: device processing failed: {0}".format(e))
def get_first_if(self): """ Return the interface name, and ip addr of the first active non-loopback interface. """ iface='' expected=16 # how many devices should I expect... struct_size=40 # for 64bit the size is 40 bytes sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) buff=array.array('B', b'\0' * (expected * struct_size)) param = struct.pack('iL', expected*struct_size, buff.buffer_info()[0]) ret = fcntl.ioctl(sock.fileno(), 0x8912, param) retsize=(struct.unpack('iL', ret)[0]) if retsize == (expected * struct_size): logger.warn(('SIOCGIFCONF returned more than {0} up ' 'network interfaces.'), expected) sock = buff.tostring() primary = bytearray(self.get_primary_interface(), encoding='utf-8') for i in range(0, struct_size * expected, struct_size): iface=sock[i:i+16].split(b'\0', 1)[0] if len(iface) == 0 or self.is_loopback(iface) or iface != primary: # test the next one if len(iface) != 0 and not self.disable_route_warning: logger.info('interface [{0}] skipped'.format(iface)) continue else: # use this one logger.info('interface [{0}] selected'.format(iface)) break return iface.decode('latin-1'), socket.inet_ntoa(sock[i+20:i+24])
def get_primary_interface(self): """ Get the name of the primary interface, which is the one with the default route attached to it; if there are multiple default routes, the primary has the lowest Metric. :return: the interface which has the default route """ # from linux/route.h RTF_GATEWAY = 0x02 DEFAULT_DEST = "00000000" hdr_iface = "Iface" hdr_dest = "Destination" hdr_flags = "Flags" hdr_metric = "Metric" idx_iface = -1 idx_dest = -1 idx_flags = -1 idx_metric = -1 primary = None primary_metric = None if not self.disable_route_warning: logger.info("Examine /proc/net/route for primary interface") with open('/proc/net/route') as routing_table: idx = 0 for header in filter(lambda h: len(h) > 0, routing_table.readline().strip(" \n").split("\t")): if header == hdr_iface: idx_iface = idx elif header == hdr_dest: idx_dest = idx elif header == hdr_flags: idx_flags = idx elif header == hdr_metric: idx_metric = idx idx = idx + 1 for entry in routing_table.readlines(): route = entry.strip(" \n").split("\t") if route[idx_dest] == DEFAULT_DEST and int(route[idx_flags]) & RTF_GATEWAY == RTF_GATEWAY: metric = int(route[idx_metric]) iface = route[idx_iface] if primary is None or metric < primary_metric: primary = iface primary_metric = metric if primary is None: primary = '' if not self.disable_route_warning: with open('/proc/net/route') as routing_table_fh: routing_table_text = routing_table_fh.read() logger.warn('Could not determine primary interface, ' 'please ensure /proc/net/route is correct') logger.warn('Contents of /proc/net/route:\n{0}'.format(routing_table_text)) logger.warn('Primary interface examination will retry silently') self.disable_route_warning = True else: logger.info('Primary interface is [{0}]'.format(primary)) self.disable_route_warning = False return primary
def mount_dvd(self, max_retry=6, chk_err=True, dvd_device=None, mount_point=None): if dvd_device is None: dvd_device = self.get_dvd_device() if mount_point is None: mount_point = conf.get_dvd_mount_point() mountlist = shellutil.run_get_output("mount")[1] existing = self.get_mount_point(mountlist, dvd_device) if existing is not None: #Already mounted logger.info("{0} is already mounted at {1}", dvd_device, existing) return if not os.path.isdir(mount_point): os.makedirs(mount_point) for retry in range(0, max_retry): retcode = self.mount(dvd_device, mount_point, option="-o ro -t udf,iso9660", chk_err=chk_err) if retcode == 0: logger.info("Successfully mounted dvd") return if retry < max_retry - 1: logger.warn("Mount dvd failed: retry={0}, ret={1}", retry, retcode) time.sleep(5) if chk_err: raise OSUtilError("Failed to mount dvd.")
def useradd(self, username, expiration=None, comment=None): """Create user account using tmsh Our policy is to create two accounts when booting a BIG-IP instance. The first account is the one that the user specified when they did the instance creation. The second one is the admin account that is, or should be, built in to the system. :param username: The username that you want to add to the system :param expiration: The expiration date to use. We do not use this value. :param comment: description of the account. We do not use this value. """ if self.get_userentry(username): logger.info("User {0} already exists, skip useradd", username) return None cmd = "/usr/bin/tmsh create auth user %s partition-access add { all-partitions { role admin } } shell bash" % (username) retcode, out = shellutil.run_get_output(cmd, log_cmd=True, chk_err=True) if retcode != 0: raise OSUtilError( "Failed to create user account:{0}, retcode:{1}, output:{2}".format(username, retcode, out) ) self._save_sys_config() return retcode
def _wait_until_mcpd_is_initialized(self): """Wait for mcpd to become available All configuration happens in mcpd so we need to wait that this is available before we go provisioning the system. I call this method at the first opportunity I have (during the DVD mounting call). This ensures that the rest of the provisioning does not need to wait for mcpd to be available unless it absolutely wants to. :return bool: Returns True upon success :raises OSUtilError: Raises exception if mcpd does not come up within roughly 50 minutes (100 * 30 seconds) """ for retries in range(1, 100): # Retry until mcpd completes startup: logger.info("Checking to see if mcpd is up") rc = shellutil.run("/usr/bin/tmsh -a show sys mcp-state field-fmt 2>/dev/null | grep phase | grep running", chk_err=False) if rc == 0: logger.info("mcpd is up!") break time.sleep(30) if rc is 0: return True raise OSUtilError( "mcpd hasn't completed initialization! Cannot proceed!" )
def run(self): #If provision is enabled, run default provision handler if conf.get_provision_enabled(): super(UbuntuProvisionHandler, self).run() return logger.info("run Ubuntu provision handler") provisioned = os.path.join(conf.get_lib_dir(), "provisioned") if os.path.isfile(provisioned): return logger.info("Waiting cloud-init to copy ovf-env.xml.") self.wait_for_ovfenv() protocol = self.protocol_util.get_protocol() self.report_not_ready("Provisioning", "Starting") logger.info("Sleep 15 seconds to prevent throttling") time.sleep(15) #Sleep to prevent throttling try: logger.info("Wait for ssh host key to be generated.") thumbprint = self.wait_for_ssh_host_key() fileutil.write_file(provisioned, "") logger.info("Finished provisioning") except ProvisionError as e: logger.error("Provision failed: {0}", e) self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) return self.report_ready(thumbprint) self.report_event("Provision succeed", is_success=True)
def change_partition_type(self, suppress_message, option_str): """ use sfdisk to change partition type. First try with --part-type; if fails, fall back to -c """ command_to_use = '--part-type' input = "sfdisk {0} {1} {2}".format(command_to_use, '-f' if suppress_message else '', option_str) err_code, output = shellutil.run_get_output(input, chk_err=False, log_cmd=True) # fall back to -c if err_code != 0: logger.info("sfdisk with --part-type failed [{0}], retrying with -c", err_code) command_to_use = '-c' input = "sfdisk {0} {1} {2}".format(command_to_use, '-f' if suppress_message else '', option_str) err_code, output = shellutil.run_get_output(input, log_cmd=True) if err_code == 0: logger.info('{0} succeeded', input) else: logger.error('{0} failed [{1}: {2}]', input, err_code, output) return err_code, output
def get_dhcp_lease_endpoint(self): """ OpenBSD has a sligthly different lease file format. """ endpoint = None pathglob = '/var/db/dhclient.leases.{}'.format(self.get_first_if()[0]) HEADER_LEASE = "lease" HEADER_OPTION = "option option-245" HEADER_EXPIRE = "expire" FOOTER_LEASE = "}" FORMAT_DATETIME = "%Y/%m/%d %H:%M:%S %Z" logger.info("looking for leases in path [{0}]".format(pathglob)) for lease_file in glob.glob(pathglob): leases = open(lease_file).read() if HEADER_OPTION in leases: cached_endpoint = None has_option_245 = False expired = True # assume expired for line in leases.splitlines(): if line.startswith(HEADER_LEASE): cached_endpoint = None has_option_245 = False expired = True elif HEADER_OPTION in line: try: ipaddr = line.split(" ")[-1].strip(";").split(":") cached_endpoint = \ ".".join(str(int(d, 16)) for d in ipaddr) has_option_245 = True except ValueError: logger.error("could not parse '{0}'".format(line)) elif HEADER_EXPIRE in line: if "never" in line: expired = False else: try: expire_string = line.split( " ", 4)[-1].strip(";") expire_date = datetime.datetime.strptime( expire_string, FORMAT_DATETIME) if expire_date > datetime.datetime.utcnow(): expired = False except ValueError: logger.error("could not parse expiry token " "'{0}'".format(line)) elif FOOTER_LEASE in line: logger.info("dhcp entry:{0}, 245:{1}, expired: {2}" .format(cached_endpoint, has_option_245, expired)) if not expired and cached_endpoint is not None and has_option_245: endpoint = cached_endpoint logger.info("found endpoint [{0}]".format(endpoint)) # we want to return the last valid entry, so # keep searching if endpoint is not None: logger.info("cached endpoint found [{0}]".format(endpoint)) else: logger.info("cached endpoint not found") return endpoint
def del_root_password(self): logger.info('del_root_password') ret, out = self._run_clish('set user admin password-hash *LOCK*') if ret != 0: raise OSUtilError("Failed to delete root password")
def run(self): # if provisioning is already done, return provisioned = os.path.join(conf.get_lib_dir(), "provisioned") if os.path.isfile(provisioned): logger.info("Provisioning already completed, skipping.") return thumbprint = None # If provision is not enabled, report ready and then return if not conf.get_provision_enabled(): logger.info("Provisioning is disabled, skipping.") else: logger.info("Running default provisioning handler") try: if not self.validate_cloud_init(is_expected=False): raise ProvisionError( "cloud-init appears to be running, " "this is not expected, cannot continue") logger.info("Copying ovf-env.xml") ovf_env = self.protocol_util.copy_ovf_env() self.protocol_util.get_protocol_by_file() self.report_not_ready("Provisioning", "Starting") logger.info("Starting provisioning") self.provision(ovf_env) thumbprint = self.reg_ssh_host_key() self.osutil.restart_ssh_service() self.report_event("Provision succeed", is_success=True) except (ProtocolError, ProvisionError) as e: self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) logger.error("Provisioning failed: {0}", ustr(e)) return # write out provisioned file and report Ready fileutil.write_file(provisioned, "") self.report_ready(thumbprint) logger.info("Provisioning complete")
def deploy_ssh_keypairs(self, ovfenv): for keypair in ovfenv.ssh_keypairs: logger.info("Deploy ssh key pairs.") self.osutil.deploy_ssh_keypair(ovfenv.username, keypair)
def deploy_ssh_pubkeys(self, ovfenv): for pubkey in ovfenv.ssh_pubkeys: logger.info("Deploy ssh public key.") self.osutil.deploy_ssh_pubkey(ovfenv.username, pubkey)
def run(self): """ This is the main loop which watches for agent and extension updates. """ logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler get_monitor_handler().run() from azurelinuxagent.ga.env import get_env_handler get_env_handler().run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() try: send_event_time = datetime.utcnow() self._ensure_no_orphans() self._emit_restart_event() while self.running: if self._is_orphaned: logger.info("Goal state agent {0} was orphaned -- exiting", CURRENT_AGENT) break if self._upgrade_available(): if len(self.agents) > 0: logger.info( u"Agent {0} discovered {1} as an update and will exit", CURRENT_AGENT, self.agents[0].name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() if last_etag != exthandlers_handler.last_etag: add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ProcessGoalState, is_success=True, duration=elapsed_milliseconds(utc_start), log_event=True) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn(u"Agent {0} failed with exception: {1}", CURRENT_AGENT, ustr(e)) logger.warn(traceback.format_exc()) sys.exit(1) return self._shutdown() sys.exit(0) return
def run_latest(self, child_args=None): """ This method is called from the daemon to find and launch the most current, downloaded agent. Note: - Most events should be tagged to the launched agent (agent_version) """ if self.child_process is not None: raise Exception( "Illegal attempt to launch multiple goal state Agent processes" ) if self.signal_handler is None: self.signal_handler = signal.signal(signal.SIGTERM, self.forward_signal) latest_agent = self.get_latest_agent() if latest_agent is None: logger.info(u"Installed Agent {0} is the most current agent", CURRENT_AGENT) agent_cmd = "python -u {0} -run-exthandlers".format(sys.argv[0]) agent_dir = os.getcwd() agent_name = CURRENT_AGENT agent_version = CURRENT_VERSION else: logger.info(u"Determined Agent {0} to be the latest agent", latest_agent.name) agent_cmd = latest_agent.get_agent_cmd() agent_dir = latest_agent.get_agent_dir() agent_name = latest_agent.name agent_version = latest_agent.version if child_args is not None: agent_cmd = "{0} {1}".format(agent_cmd, child_args) try: # Launch the correct Python version for python-based agents cmds = textutil.safe_shlex_split(agent_cmd) if cmds[0].lower() == "python": cmds[0] = get_python_cmd() agent_cmd = " ".join(cmds) self._evaluate_agent_health(latest_agent) self.child_process = subprocess.Popen(cmds, cwd=agent_dir, stdout=sys.stdout, stderr=sys.stderr, env=os.environ) logger.verbose(u"Agent {0} launched with command '{1}'", agent_name, agent_cmd) # If the most current agent is the installed agent and update is enabled, # assume updates are likely available and poll every second. # This reduces the start-up impact of finding / launching agent updates on # fresh VMs. if latest_agent is None and conf.get_autoupdate_enabled(): poll_interval = 1 else: poll_interval = CHILD_POLL_INTERVAL ret = None start_time = time.time() while (time.time() - start_time) < CHILD_HEALTH_INTERVAL: time.sleep(poll_interval) ret = self.child_process.poll() if ret is not None: break if ret is None or ret <= 0: msg = u"Agent {0} launched with command '{1}' is successfully running".format( agent_name, agent_cmd) logger.info(msg) add_event(AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=True, message=msg) if ret is None: ret = self.child_process.wait() else: msg = u"Agent {0} launched with command '{1}' failed with return code: {2}".format( agent_name, agent_cmd, ret) logger.warn(msg) add_event(AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=False, message=msg) if ret is not None and ret > 0: msg = u"Agent {0} launched with command '{1}' returned code: {2}".format( agent_name, agent_cmd, ret) logger.warn(msg) if latest_agent is not None: latest_agent.mark_failure(is_fatal=True) except Exception as e: # Ignore child errors during termination if self.running: msg = u"Agent {0} launched with command '{1}' failed with exception: {2}".format( agent_name, agent_cmd, ustr(e)) logger.warn(msg) add_event(AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=False, message=msg) if latest_agent is not None: latest_agent.mark_failure(is_fatal=True) self.child_process = None return
def start(self): """ Start a thread in the background to process the RDMA tasks and returns. """ logger.info("RDMA: starting device processing in the background.") threading.Thread(target=self.process).start()
def restore_rules_files(self, rules_files=''): logger.info('restore_rules_files is ignored on GAiA')
def get_endpoint_from_leases_path(pathglob): """ Try to discover and decode the wireserver endpoint in the specified dhcp leases path. :param pathglob: The path containing dhcp lease files :return: The endpoint if available, otherwise None """ endpoint = None HEADER_LEASE = "lease" HEADER_OPTION = "option unknown-245" HEADER_DNS = "option domain-name-servers" HEADER_EXPIRE = "expire" FOOTER_LEASE = "}" FORMAT_DATETIME = "%Y/%m/%d %H:%M:%S" logger.info("looking for leases in path [{0}]".format(pathglob)) for lease_file in glob.glob(pathglob): leases = open(lease_file).read() if HEADER_OPTION in leases: cached_endpoint = None has_option_245 = False expired = True # assume expired for line in leases.splitlines(): if line.startswith(HEADER_LEASE): cached_endpoint = None has_option_245 = False expired = True elif HEADER_DNS in line: cached_endpoint = line.replace(HEADER_DNS, '').strip(" ;") elif HEADER_OPTION in line: has_option_245 = True elif HEADER_EXPIRE in line: if "never" in line: expired = False else: try: expire_string = line.split(" ", 4)[-1].strip(";") expire_date = datetime.datetime.strptime( expire_string, FORMAT_DATETIME) if expire_date > datetime.datetime.utcnow(): expired = False except: logger.error( "could not parse expiry token '{0}'". format(line)) elif FOOTER_LEASE in line: logger.info( "dhcp entry:{0}, 245:{1}, expired:{2}".format( cached_endpoint, has_option_245, expired)) if not expired and cached_endpoint is not None and has_option_245: endpoint = cached_endpoint logger.info( "found endpoint [{0}]".format(endpoint)) # we want to return the last valid entry, so # keep searching if endpoint is not None: logger.info("cached endpoint found [{0}]".format(endpoint)) else: logger.info("cached endpoint not found") return endpoint
def conf_sudoer(self, username, nopasswd=False, remove=False): logger.info('conf_sudoer is not supported on GAiA')
def install_driver(self): """ Install the KVP daemon and the appropriate RDMA driver package for the RDMA firmware. """ # Check and install the KVP deamon if it not running time.sleep(10) # give some time for the hv_hvp_daemon to start up. kvpd_running = RDMAHandler.is_kvp_daemon_running() logger.info('RDMA: kvp daemon running: %s' % kvpd_running) if not kvpd_running: self.check_or_install_kvp_daemon() time.sleep(10) # wait for post-install reboot or kvp to come up # Find out RDMA firmware version and see if the existing package needs # updating or if the package is missing altogether (and install it) fw_version = self.get_rdma_version() if not fw_version: raise Exception('Cannot determine RDMA firmware version') logger.info("RDMA: found firmware version: {0}".format(fw_version)) fw_version = self.get_int_rdma_version(fw_version) installed_pkg = self.get_rdma_package_info() if installed_pkg: logger.info( 'RDMA: driver package present: {0}'.format(installed_pkg)) if self.is_rdma_package_up_to_date(installed_pkg, fw_version): logger.info('RDMA: driver package is up-to-date') return else: logger.info('RDMA: driver package needs updating') self.update_rdma_package(fw_version) else: logger.info('RDMA: driver package is NOT installed') self.update_rdma_package(fw_version)
def allow_dhcp_broadcast(self): logger.info('allow_dhcp_broadcast is ignored on GAiA')
def update_rdma_package(self, fw_version): logger.info("RDMA: updating RDMA packages") self.refresh_repos() self.force_install_package(self.rdma_wrapper_package_name) self.install_rdma_drivers(fw_version)
def get_primary_interface(self): """ Get the name of the primary interface, which is the one with the default route attached to it; if there are multiple default routes, the primary has the lowest Metric. :return: the interface which has the default route """ # from linux/route.h RTF_GATEWAY = 0x02 DEFAULT_DEST = "00000000" hdr_iface = "Iface" hdr_dest = "Destination" hdr_flags = "Flags" hdr_metric = "Metric" idx_iface = -1 idx_dest = -1 idx_flags = -1 idx_metric = -1 primary = None primary_metric = None if not self.disable_route_warning: logger.info("Examine /proc/net/route for primary interface") with open('/proc/net/route') as routing_table: idx = 0 for header in filter( lambda h: len(h) > 0, routing_table.readline().strip(" \n").split("\t")): if header == hdr_iface: idx_iface = idx elif header == hdr_dest: idx_dest = idx elif header == hdr_flags: idx_flags = idx elif header == hdr_metric: idx_metric = idx idx = idx + 1 for entry in routing_table.readlines(): route = entry.strip(" \n").split("\t") if route[idx_dest] == DEFAULT_DEST and int( route[idx_flags]) & RTF_GATEWAY == RTF_GATEWAY: metric = int(route[idx_metric]) iface = route[idx_iface] if primary is None or metric < primary_metric: primary = iface primary_metric = metric if primary is None: primary = '' if not self.disable_route_warning: with open('/proc/net/route') as routing_table_fh: routing_table_text = routing_table_fh.read() logger.warn('Could not determine primary interface, ' 'please ensure /proc/net/route is correct') logger.warn('Contents of /proc/net/route:\n{0}'.format( routing_table_text)) logger.warn( 'Primary interface examination will retry silently') self.disable_route_warning = True else: logger.info('Primary interface is [{0}]'.format(primary)) self.disable_route_warning = False return primary
def _log_cgroup_warning(format_string, *args): message = format_string.format(*args) logger.info("[CGW] " + message) # log as INFO for now, in the future it should be logged as WARNING add_event(op=WALAEventOperation.CGroupsInfo, message=message, is_success=False, log_event=False)
def check_or_install_kvp_daemon(self): """Checks if kvp daemon package is installed, if not installs the package and reboots the machine. """ logger.info("RDMA: Checking kvp daemon packages.") kvp_pkgs = [self.hyper_v_package_name, self.hyper_v_package_name_new] for pkg in kvp_pkgs: logger.info("RDMA: Checking if package %s installed" % pkg) installed = self.is_package_installed(pkg) if installed: raise Exception( 'RDMA: package %s is installed, but the kvp daemon is not running' % pkg) kvp_pkg_to_install = self.hyper_v_package_name logger.info("RDMA: no kvp drivers installed, will install '%s'" % kvp_pkg_to_install) logger.info("RDMA: trying to install kvp package '%s'" % kvp_pkg_to_install) if self.install_package(kvp_pkg_to_install) != 0: raise Exception("RDMA: failed to install kvp daemon package '%s'" % kvp_pkg_to_install) logger.info("RDMA: package '%s' successfully installed" % kvp_pkg_to_install) logger.info("RDMA: Machine will now be rebooted.") self.reboot_system()
def mount_resource_disk(self, mount_point): device = self.osutil.device_for_ide_port(1) if device is None: raise ResourceDiskError("unable to detect disk topology") device = "/dev/{0}".format(device) partition = device + "1" mount_list = shellutil.run_get_output("mount")[1] existing = self.osutil.get_mount_point(mount_list, device) if existing: logger.info("Resource disk [{0}] is already mounted [{1}]", partition, existing) return existing fileutil.mkdir(mount_point, mode=0o755) logger.info("Examining partition table") ret = shellutil.run_get_output("parted {0} print".format(device)) if ret[0]: raise ResourceDiskError("Could not determine partition info for " "{0}: {1}".format(device, ret[1])) force_option = 'F' if self.fs == 'xfs': force_option = 'f' mkfs_string = "mkfs.{0} {1} -{2}".format(self.fs, partition, force_option) if "gpt" in ret[1]: logger.info("GPT detected, finding partitions") parts = [ x for x in ret[1].split("\n") if re.match("^\s*[0-9]+", x) ] logger.info("Found {0} GPT partition(s).", len(parts)) if len(parts) > 1: logger.info("Removing old GPT partitions") for i in range(1, len(parts) + 1): logger.info("Remove partition {0}", i) shellutil.run("parted {0} rm {1}".format(device, i)) logger.info("Creating new GPT partition") shellutil.run( "parted {0} mkpart primary 0% 100%".format(device)) logger.info("Format partition [{0}]", mkfs_string) shellutil.run(mkfs_string) else: logger.info("GPT not detected, determining filesystem") ret = self.change_partition_type(suppress_message=True, option_str="{0} 1".format(device)) ptype = ret[1].strip() if ptype == "7" and self.fs != "ntfs": logger.info("The partition is formatted with ntfs, updating " "partition type to 83") self.change_partition_type( suppress_message=False, option_str="{0} 1 83".format(device)) logger.info("Format partition [{0}]", mkfs_string) shellutil.run(mkfs_string) else: logger.info("The partition type is {0}", ptype) mount_options = conf.get_resourcedisk_mountoptions() mount_string = self.get_mount_string(mount_options, partition, mount_point) logger.info("Mount resource disk [{0}]", mount_string) ret = shellutil.run(mount_string, chk_err=False) if ret: # Some kernels seem to issue an async partition re-read after a # 'parted' command invocation. This causes mount to fail if the # partition re-read is not complete by the time mount is # attempted. Seen in CentOS 7.2. Force a sequential re-read of # the partition and try mounting. logger.warn("Failed to mount resource disk. " "Retry mounting after re-reading partition info.") if shellutil.run("sfdisk -R {0}".format(device), chk_err=False): shellutil.run("blockdev --rereadpt {0}".format(device), chk_err=False) ret = shellutil.run(mount_string, chk_err=False) if ret: logger.warn("Failed to mount resource disk. " "Attempting to format and retry mount.") shellutil.run(mkfs_string) ret = shellutil.run(mount_string) if ret: raise ResourceDiskError("Could not mount {0} " "after syncing partition table: " "{1}".format(partition, ret)) logger.info("Resource disk {0} is mounted at {1} with {2}", device, mount_point, self.fs) return mount_point
def __setup_azure_slice(): """ The agent creates "azure.slice" for use by extensions and the agent. The agent runs under "azure.slice" directly and each extension runs under its own slice ("Microsoft.CPlat.Extension.slice" in the example below). All the slices for extensions are grouped under "vmextensions.slice". Example: -.slice ├─user.slice ├─system.slice └─azure.slice ├─walinuxagent.service │ ├─5759 /usr/bin/python3 -u /usr/sbin/waagent -daemon │ └─5764 python3 -u bin/WALinuxAgent-2.2.53-py2.7.egg -run-exthandlers └─azure-vmextensions.slice └─Microsoft.CPlat.Extension.slice └─5894 /usr/bin/python3 /var/lib/waagent/Microsoft.CPlat.Extension-1.0.0.0/enable.py This method ensures that the "azure" and "vmextensions" slices are created. Setup should create those slices under /lib/systemd/system; but if they do not exist, __ensure_azure_slices_exist will create them. It also creates drop-in files to set the agent's Slice and CPUAccounting if they have not been set up in the agent's unit file. Lastly, the method also cleans up unit files left over from previous versions of the agent. """ # Older agents used to create this slice, but it was never used. Cleanup the file. CGroupConfigurator._Impl.__cleanup_unit_file("/etc/systemd/system/system-walinuxagent.extensions.slice") unit_file_install_path = systemd.get_unit_file_install_path() azure_slice = os.path.join(unit_file_install_path, AZURE_SLICE) vmextensions_slice = os.path.join(unit_file_install_path, _VMEXTENSIONS_SLICE) logcollector_slice = os.path.join(unit_file_install_path, LOGCOLLECTOR_SLICE) agent_unit_file = systemd.get_agent_unit_file() agent_drop_in_path = systemd.get_agent_drop_in_path() agent_drop_in_file_slice = os.path.join(agent_drop_in_path, _AGENT_DROP_IN_FILE_SLICE) agent_drop_in_file_cpu_accounting = os.path.join(agent_drop_in_path, _DROP_IN_FILE_CPU_ACCOUNTING) files_to_create = [] if not os.path.exists(azure_slice): files_to_create.append((azure_slice, _AZURE_SLICE_CONTENTS)) if not os.path.exists(vmextensions_slice): files_to_create.append((vmextensions_slice, _VMEXTENSIONS_SLICE_CONTENTS)) if not os.path.exists(logcollector_slice): slice_contents = _LOGCOLLECTOR_SLICE_CONTENTS_FMT.format(cpu_quota=_LOGCOLLECTOR_CPU_QUOTA, memory_limit=_LOGCOLLECTOR_MEMORY_LIMIT) files_to_create.append((logcollector_slice, slice_contents)) if fileutil.findre_in_file(agent_unit_file, r"Slice=") is not None: CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_slice) else: if not os.path.exists(agent_drop_in_file_slice): files_to_create.append((agent_drop_in_file_slice, _AGENT_DROP_IN_FILE_SLICE_CONTENTS)) if fileutil.findre_in_file(agent_unit_file, r"CPUAccounting=") is not None: CGroupConfigurator._Impl.__cleanup_unit_file(agent_drop_in_file_cpu_accounting) else: if not os.path.exists(agent_drop_in_file_cpu_accounting): files_to_create.append((agent_drop_in_file_cpu_accounting, _DROP_IN_FILE_CPU_ACCOUNTING_CONTENTS)) if len(files_to_create) > 0: # create the unit files, but if 1 fails remove all and return try: for path, contents in files_to_create: CGroupConfigurator._Impl.__create_unit_file(path, contents) except Exception as exception: _log_cgroup_warning("Failed to create unit files for the azure slice: {0}", ustr(exception)) for unit_file in files_to_create: CGroupConfigurator._Impl.__cleanup_unit_file(unit_file) return # reload the systemd configuration; the new slices will be used once the agent's service restarts try: logger.info("Executing systemctl daemon-reload...") shellutil.run_command(["systemctl", "daemon-reload"]) except Exception as exception: _log_cgroup_warning("daemon-reload failed (create azure slice): {0}", ustr(exception))
def run(self): if not conf.get_provision_enabled(): logger.info("Provisioning is disabled, skipping.") self.write_provisioned() self.report_ready() return try: utc_start = datetime.utcnow() thumbprint = None if self.is_provisioned(): logger.info("Provisioning already completed, skipping.") return logger.info("Running default provisioning handler") if not self.validate_cloud_init(is_expected=False): raise ProvisionError("cloud-init appears to be running, " "this is not expected, cannot continue") logger.info("Copying ovf-env.xml") ovf_env = self.protocol_util.copy_ovf_env() self.protocol_util.get_protocol_by_file() self.report_not_ready("Provisioning", "Starting") logger.info("Starting provisioning") self.provision(ovf_env) thumbprint = self.reg_ssh_host_key() self.osutil.restart_ssh_service() self.write_provisioned() self.report_event("Provisioning succeeded", is_success=True, duration=elapsed_milliseconds(utc_start)) self.report_event(self.create_guest_state_telemetry_messsage(), is_success=True, operation=WALAEventOperation.GuestState) self.report_ready(thumbprint) logger.info("Provisioning complete") except (ProtocolError, ProvisionError) as e: self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) logger.error("Provisioning failed: {0}", ustr(e)) return
def _log_cgroup_info(format_string, *args): message = format_string.format(*args) logger.info("[CGI] " + message) add_event(op=WALAEventOperation.CGroupsInfo, message=message)
def get_dhcp_lease_endpoint(self): # pylint: disable=R0912 """ OpenBSD has a sligthly different lease file format. """ endpoint = None pathglob = '/var/db/dhclient.leases.{}'.format(self.get_first_if()[0]) HEADER_LEASE = "lease" # pylint: disable=C0103 HEADER_OPTION = "option option-245" # pylint: disable=C0103 HEADER_EXPIRE = "expire" # pylint: disable=C0103 FOOTER_LEASE = "}" # pylint: disable=C0103 FORMAT_DATETIME = "%Y/%m/%d %H:%M:%S %Z" # pylint: disable=C0103 logger.info("looking for leases in path [{0}]".format(pathglob)) for lease_file in glob.glob(pathglob): # pylint: disable=R1702 leases = open(lease_file).read() if HEADER_OPTION in leases: cached_endpoint = None has_option_245 = False expired = True # assume expired for line in leases.splitlines(): if line.startswith(HEADER_LEASE): cached_endpoint = None has_option_245 = False expired = True elif HEADER_OPTION in line: try: ipaddr = line.split(" ")[-1].strip(";").split(":") cached_endpoint = \ ".".join(str(int(d, 16)) for d in ipaddr) has_option_245 = True except ValueError: logger.error("could not parse '{0}'".format(line)) elif HEADER_EXPIRE in line: if "never" in line: expired = False else: try: expire_string = line.split(" ", 4)[-1].strip(";") expire_date = datetime.datetime.strptime( expire_string, FORMAT_DATETIME) if expire_date > datetime.datetime.utcnow(): expired = False except ValueError: logger.error("could not parse expiry token " "'{0}'".format(line)) elif FOOTER_LEASE in line: logger.info( "dhcp entry:{0}, 245:{1}, expired: {2}".format( cached_endpoint, has_option_245, expired)) if not expired and cached_endpoint is not None and has_option_245: endpoint = cached_endpoint logger.info( "found endpoint [{0}]".format(endpoint)) # we want to return the last valid entry, so # keep searching if endpoint is not None: logger.info("cached endpoint found [{0}]".format(endpoint)) else: logger.info("cached endpoint not found") return endpoint
def mkfile(self, filename, nbytes): """ Create a non-sparse file of that size. Deletes and replaces existing file. To allow efficient execution, fallocate will be tried first. This includes ``os.posix_fallocate`` on Python 3.3+ (unix) and the ``fallocate`` command in the popular ``util-linux{,-ng}`` package. A dd fallback will be tried too. When size < 64M, perform single-pass dd. Otherwise do two-pass dd. """ if not isinstance(nbytes, int): nbytes = int(nbytes) if nbytes < 0: raise ValueError(nbytes) if os.path.isfile(filename): os.remove(filename) # If file system is xfs, use dd right away as we have been reported that # swap enabling fails in xfs fs when disk space is allocated with fallocate ret = 0 fn_sh = shellutil.quote((filename, )) if self.fs != 'xfs': # os.posix_fallocate if sys.version_info >= (3, 3): # Probable errors: # - OSError: Seen on Cygwin, libc notimpl? # - AttributeError: What if someone runs this under... with open(filename, 'w') as f: try: os.posix_fallocate(f.fileno(), 0, nbytes) return 0 except: # Not confident with this thing, just keep trying... pass # fallocate command ret = shellutil.run(u"umask 0077 && fallocate -l {0} {1}".format( nbytes, fn_sh)) if ret == 0: return ret logger.info("fallocate unsuccessful, falling back to dd") # dd fallback dd_maxbs = 64 * 1024**2 dd_cmd = "umask 0077 && dd if=/dev/zero bs={0} count={1} " \ "conv=notrunc of={2}" blocks = int(nbytes / dd_maxbs) if blocks > 0: ret = shellutil.run(dd_cmd.format(dd_maxbs, blocks, fn_sh)) << 8 remains = int(nbytes % dd_maxbs) if remains > 0: ret += shellutil.run(dd_cmd.format(remains, 1, fn_sh)) if ret == 0: logger.info("dd successful") else: logger.error("dd unsuccessful") return ret
def _remove_user(self, username): logger.info("Removing remote access user '{0}'", username) self._os_util.del_account(username)
def delete_user(self, username): self.os_util.del_account(username) logger.info("User deleted {0}".format(username))
def mount_resource_disk(self, mount_point): device = self.osutil.device_for_ide_port(1) if device is None: raise ResourceDiskError("unable to detect disk topology") device = "/dev/{0}".format(device) partition = device + "1" mount_list = shellutil.run_get_output("mount")[1] existing = self.osutil.get_mount_point(mount_list, device) if existing: logger.info("Resource disk [{0}] is already mounted [{1}]", partition, existing) return existing try: fileutil.mkdir(mount_point, mode=0o755) except OSError as ose: msg = "Failed to create mount point " \ "directory [{0}]: {1}".format(mount_point, ose) logger.error(msg) raise ResourceDiskError(msg=msg, inner=ose) logger.info("Examining partition table") ret = shellutil.run_get_output("parted {0} print".format(device)) if ret[0]: raise ResourceDiskError("Could not determine partition info for " "{0}: {1}".format(device, ret[1])) force_option = 'F' if self.fs == 'xfs': force_option = 'f' mkfs_string = "mkfs.{0} -{2} {1}".format( self.fs, partition, force_option) if "gpt" in ret[1]: logger.info("GPT detected, finding partitions") parts = [x for x in ret[1].split("\n") if re.match(r"^\s*[0-9]+", x)] logger.info("Found {0} GPT partition(s).", len(parts)) if len(parts) > 1: logger.info("Removing old GPT partitions") for i in range(1, len(parts) + 1): logger.info("Remove partition {0}", i) shellutil.run("parted {0} rm {1}".format(device, i)) logger.info("Creating new GPT partition") shellutil.run( "parted {0} mkpart primary 0% 100%".format(device)) logger.info("Format partition [{0}]", mkfs_string) shellutil.run(mkfs_string) else: logger.info("GPT not detected, determining filesystem") ret = self.change_partition_type( suppress_message=True, option_str="{0} 1 -n".format(device)) ptype = ret[1].strip() if ptype == "7" and self.fs != "ntfs": logger.info("The partition is formatted with ntfs, updating " "partition type to 83") self.change_partition_type( suppress_message=False, option_str="{0} 1 83".format(device)) self.reread_partition_table(device) logger.info("Format partition [{0}]", mkfs_string) shellutil.run(mkfs_string) else: logger.info("The partition type is {0}", ptype) mount_options = conf.get_resourcedisk_mountoptions() mount_string = self.get_mount_string(mount_options, partition, mount_point) attempts = 5 while not os.path.exists(partition) and attempts > 0: logger.info("Waiting for partition [{0}], {1} attempts remaining", partition, attempts) sleep(5) attempts -= 1 if not os.path.exists(partition): raise ResourceDiskError( "Partition was not created [{0}]".format(partition)) logger.info("Mount resource disk [{0}]", mount_string) ret, output = shellutil.run_get_output(mount_string, chk_err=False) # if the exit code is 32, then the resource disk can be already mounted if ret == 32 and output.find("is already mounted") != -1: logger.warn("Could not mount resource disk: {0}", output) elif ret != 0: # Some kernels seem to issue an async partition re-read after a # 'parted' command invocation. This causes mount to fail if the # partition re-read is not complete by the time mount is # attempted. Seen in CentOS 7.2. Force a sequential re-read of # the partition and try mounting. logger.warn("Failed to mount resource disk. " "Retry mounting after re-reading partition info.") self.reread_partition_table(device) ret, output = shellutil.run_get_output(mount_string, chk_err=False) if ret: logger.warn("Failed to mount resource disk. " "Attempting to format and retry mount. [{0}]", output) shellutil.run(mkfs_string) ret, output = shellutil.run_get_output(mount_string) if ret: raise ResourceDiskError("Could not mount {0} " "after syncing partition table: " "[{1}] {2}".format(partition, ret, output)) logger.info("Resource disk {0} is mounted at {1} with {2}", device, mount_point, self.fs) return mount_point
def mount_resource_disk(self, mount_point): device = self.osutil.device_for_ide_port(1) if device is None: raise ResourceDiskError("unable to detect disk topology") logger.info('Resource disk device {0} found.', device) # 2. Get partition device = "/dev/{0}".format(device) partition = device + "1" logger.info('Resource disk partition {0} found.', partition) # 3. Mount partition mount_list = shellutil.run_get_output("mount")[1] existing = self.osutil.get_mount_point(mount_list, device) if existing: logger.info("Resource disk [{0}] is already mounted [{1}]", partition, existing) return existing try: fileutil.mkdir(mount_point, mode=0o755) except OSError as ose: msg = "Failed to create mount point " \ "directory [{0}]: {1}".format(mount_point, ose) logger.error(msg) raise ResourceDiskError(msg=msg, inner=ose) force_option = 'F' if self.fs == 'xfs': force_option = 'f' mkfs_string = "mkfs.{0} -{2} {1}".format(self.fs, partition, force_option) # Compare to the Default mount_resource_disk, we don't check for GPT that is not supported on OpenWRT ret = self.change_partition_type(suppress_message=True, option_str="{0} 1 -n".format(device)) ptype = ret[1].strip() if ptype == "7" and self.fs != "ntfs": logger.info("The partition is formatted with ntfs, updating " "partition type to 83") self.change_partition_type(suppress_message=False, option_str="{0} 1 83".format(device)) self.reread_partition_table(device) logger.info("Format partition [{0}]", mkfs_string) shellutil.run(mkfs_string) else: logger.info("The partition type is {0}", ptype) mount_options = conf.get_resourcedisk_mountoptions() mount_string = self.get_mount_string(mount_options, partition, mount_point) attempts = 5 while not os.path.exists(partition) and attempts > 0: logger.info("Waiting for partition [{0}], {1} attempts remaining", partition, attempts) sleep(5) attempts -= 1 if not os.path.exists(partition): raise ResourceDiskError( "Partition was not created [{0}]".format(partition)) if os.path.ismount(mount_point): logger.warn("Disk is already mounted on {0}", mount_point) else: # Some kernels seem to issue an async partition re-read after a # command invocation. This causes mount to fail if the # partition re-read is not complete by the time mount is # attempted. Seen in CentOS 7.2. Force a sequential re-read of # the partition and try mounting. logger.info("Mounting after re-reading partition info.") self.reread_partition_table(device) logger.info("Mount resource disk [{0}]", mount_string) ret, output = shellutil.run_get_output(mount_string) if ret: logger.warn( "Failed to mount resource disk. " "Attempting to format and retry mount. [{0}]", output) shellutil.run(mkfs_string) ret, output = shellutil.run_get_output(mount_string) if ret: raise ResourceDiskError("Could not mount {0} " "after syncing partition table: " "[{1}] {2}".format( partition, ret, output)) logger.info("Resource disk {0} is mounted at {1} with {2}", device, mount_point, self.fs) return mount_point
def install_driver(self): """Install the appropriate driver package for the RDMA firmware""" fw_version = RDMAHandler.get_rdma_version() if not fw_version: error_msg = 'RDMA: Could not determine firmware version. ' error_msg += 'Therefore, no driver will be installed.' logger.error(error_msg) return zypper_install = 'zypper -n in %s' zypper_remove = 'zypper -n rm %s' zypper_search = 'zypper se -s %s' package_name = 'msft-rdma-kmp-default' cmd = zypper_search % package_name status, repo_package_info = shellutil.run_get_output(cmd) driver_package_versions = [] driver_package_installed = False for entry in repo_package_info.split('\n'): if package_name in entry: sections = entry.split('|') if len(sections) < 4: error_msg = 'RDMA: Unexpected output from"%s": "%s"' logger.error(error_msg % (cmd, entry)) continue installed = sections[0].strip() version = sections[3].strip() driver_package_versions.append(version) if fw_version in version and installed == 'i': info_msg = 'RDMA: Matching driver package "%s-%s" ' info_msg += 'is already installed, nothing to do.' logger.info(info_msg % (package_name, version)) return True if installed == 'i': driver_package_installed = True # If we get here the driver package is installed but the # version doesn't match or no package is installed requires_reboot = False if driver_package_installed: # Unloading the particular driver with rmmod does not work # We have to reboot after the new driver is installed if self.is_driver_loaded(): info_msg = 'RDMA: Currently loaded driver does not match the ' info_msg += 'firmware implementation, reboot will be required.' logger.info(info_msg) requires_reboot = True logger.info("RDMA: removing package %s" % package_name) cmd = zypper_remove % package_name shellutil.run(cmd) logger.info("RDMA: removed package %s" % package_name) logger.info("RDMA: looking for fw version %s in packages" % fw_version) for entry in driver_package_versions: if not fw_version in version: logger.info("Package '%s' is not a match." % entry) else: logger.info("Package '%s' is a match. Installing." % entry) complete_name = '%s-%s' % (package_name, version) cmd = zypper_install % complete_name result = shellutil.run(cmd) if result: error_msg = 'RDMA: Failed install of package "%s" ' error_msg += 'from available repositories.' logger.error(error_msg % complete_name) msg = 'RDMA: Successfully installed "%s" from ' msg += 'configured repositories' logger.info(msg % complete_name) self.load_driver_module() if requires_reboot: self.reboot_system() return True else: logger.info("RDMA: No suitable match in repos. Trying local.") local_packages = glob.glob('/opt/microsoft/rdma/*.rpm') for local_package in local_packages: logger.info("Examining: %s" % local_package) if local_package.endswith('.src.rpm'): continue if (package_name in local_package and fw_version in local_package): logger.info("RDMA: Installing: %s" % local_package) cmd = zypper_install % local_package result = shellutil.run(cmd) if result: error_msg = 'RDMA: Failed install of package "%s" ' error_msg += 'from local package cache' logger.error(error_msg % local_package) break msg = 'RDMA: Successfully installed "%s" from ' msg += 'local package cache' logger.info(msg % (local_package)) self.load_driver_module() if requires_reboot: self.reboot_system() return True else: error_msg = 'Unable to find driver package that matches ' error_msg += 'RDMA firmware version "%s"' % fw_version logger.error(error_msg) return
def _upgrade_available(self, base_version=CURRENT_VERSION): # Ignore new agents if updating is disabled if not conf.get_autoupdate_enabled(): return False now = time.time() if self.last_attempt_time is not None: next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency( ) else: next_attempt_time = now if next_attempt_time > now: return False family = conf.get_autoupdate_gafamily() logger.info("Checking for agent family {0} updates", family) self.last_attempt_time = now try: protocol = self.protocol_util.get_protocol() manifest_list, etag = protocol.get_vmagent_manifests() except Exception as e: msg = u"Exception retrieving agent manifests: {0}".format(ustr(e)) logger.warn(msg) add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False manifests = [m for m in manifest_list.vmAgentManifests \ if m.family == family and len(m.versionsManifestUris) > 0] if len(manifests) == 0: logger.info(u"Incarnation {0} has no agent family {1} updates", etag, family) return False try: pkg_list = protocol.get_vmagent_pkgs(manifests[0]) except ProtocolError as e: msg = u"Incarnation {0} failed to get {1} package list: " \ u"{2}".format( etag, family, ustr(e)) logger.warn(msg) add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False # Set the agents to those available for download at least as current # as the existing agent and remove from disk any agent no longer # reported to the VM. # Note: # The code leaves on disk available, but blacklisted, agents so as to # preserve the state. Otherwise, those agents could be again # downloaded and inappropriately retried. host = None if protocol and protocol.client: host = protocol.client.get_host_plugin() self._set_agents( [GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions]) self._purge_agents() self._filter_blacklisted_agents() # Return True if agents more recent than the current are available return len(self.agents) > 0 and self.agents[0].version > base_version
def run(self): if self.detect_scvmm_env(): logger.info("Exiting") time.sleep(300) sys.exit(0)
def mount_resource_disk(self, mount_point): fs = self.fs if fs != 'ufs': raise ResourceDiskError( "Unsupported filesystem type:{0}, only ufs is supported.". format(fs)) # 1. Detect device err, output = shellutil.run_get_output('gpart list') if err: raise ResourceDiskError( "Unable to detect resource disk device:{0}".format(output)) disks = self.parse_gpart_list(output) device = self.osutil.device_for_ide_port(1) if device is None or not device in disks: # fallback logic to find device err, output = shellutil.run_get_output( 'camcontrol periphlist 2:1:0') if err: # try again on "3:1:0" err, output = shellutil.run_get_output( 'camcontrol periphlist 3:1:0') if err: raise ResourceDiskError( "Unable to detect resource disk device:{0}".format( output)) # 'da1: generation: 4 index: 1 status: MORE\npass2: generation: 4 index: 2 status: LAST\n' for line in output.split('\n'): index = line.find(':') if index > 0: geom_name = line[:index] if geom_name in disks: device = geom_name break if not device: raise ResourceDiskError("Unable to detect resource disk device.") logger.info('Resource disk device {0} found.', device) # 2. Detect partition partition_table_type = disks[device] if partition_table_type == 'MBR': provider_name = device + 's1' elif partition_table_type == 'GPT': provider_name = device + 'p2' else: raise ResourceDiskError( "Unsupported partition table type:{0}".format(output)) err, output = shellutil.run_get_output( 'gpart show -p {0}'.format(device)) if err or output.find(provider_name) == -1: raise ResourceDiskError("Resource disk partition not found.") partition = '/dev/' + provider_name logger.info('Resource disk partition {0} found.', partition) # 3. Mount partition mount_list = shellutil.run_get_output("mount")[1] existing = self.osutil.get_mount_point(mount_list, partition) if existing: logger.info("Resource disk {0} is already mounted", partition) return existing fileutil.mkdir(mount_point, mode=0o755) mount_cmd = 'mount -t {0} {1} {2}'.format(fs, partition, mount_point) err = shellutil.run(mount_cmd, chk_err=False) if err: logger.info('Creating {0} filesystem on partition {1}'.format( fs, partition)) err, output = shellutil.run_get_output( 'newfs -U {0}'.format(partition)) if err: raise ResourceDiskError( "Failed to create new filesystem on partition {0}, error:{1}" .format(partition, output)) err, output = shellutil.run_get_output(mount_cmd, chk_err=False) if err: raise ResourceDiskError( "Failed to mount partition {0}, error {1}".format( partition, output)) logger.info( "Resource disk partition {0} is mounted at {1} with fstype {2}", partition, mount_point, fs) return mount_point