def ready(**kwargs): """ Wait until zypper has no locks """ defaults = {'sleep': 6} defaults.update(kwargs) cmd = 'zypper locks' sleep = defaults['sleep'] while True: log.debug("Running {}".format(cmd)) proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) proc.wait() for line in proc.stdout: line = _convert_out(line) print(line) for line in proc.stderr: line = _convert_out(line) sys.stderr.write(line) if proc.returncode != 0: wait_time = sleep log.warning("Locked - Waiting {} seconds".format(wait_time)) time.sleep(wait_time) continue else: log.warning("Unlocked") return
def cmd(**kwargs): """ Retry commands with a backoff delay """ defaults = {'retry': 3, 'sleep': 6} defaults.update(kwargs) _cmd = defaults['cmd'] retry = defaults['retry'] sleep = defaults['sleep'] for attempt in range(1, retry + 1): log.debug("Running {} on attempt {}".format(_cmd, attempt)) proc = Popen(_cmd, stdout=PIPE, stderr=PIPE, shell=True) proc.wait() for line in proc.stdout: line = _convert_out(line) print(line) for line in proc.stderr: line = _convert_out(line) sys.stderr.write(line) if proc.returncode != 0: if attempt < retry: wait_time = sleep * attempt log.warning("Waiting {} seconds to try {} again".format(wait_time, _cmd)) time.sleep(wait_time) continue else: return log.warning("command {} failed {} retries".format(_cmd, retry)) raise RuntimeError("cmd {} failed {} retries".format(_cmd, retry))
def _process_map(): """ Create a map of processes that have deleted files. """ procs = [] proc1 = Popen(shlex.split('lsof '), stdout=PIPE) # pylint: disable=line-too-long proc2 = Popen(shlex.split( "awk 'BEGIN {IGNORECASE = 1} /deleted/ {print $1 \" \" $2 \" \" $4}'"), stdin=proc1.stdout, stdout=PIPE, stderr=PIPE) proc1.stdout.close() stdout, _ = proc2.communicate() stdout = _convert_out(stdout) for proc_l in stdout.split('\n'): proc = proc_l.split(' ') proc_info = {} if proc[0] and proc[1] and proc[2]: proc_info['name'] = proc[0] if proc_info['name'] == 'httpd-pre': # lsof 'nicely' abbreviates httpd-prefork to httpd-pre proc_info['name'] = 'httpd-prefork' proc_info['pid'] = proc[1] proc_info['user'] = proc[2] procs.append(proc_info) else: continue return procs
def _hwinfo(self, device=None): """ Parse hwinfo output into dictionary args: device (str): short name of device(sda, sdb..) return: dict: hwinfo output as dict """ results = {} hwinfo_path = self._which('hwinfo') cmd = "{} --disk --only /dev/{}".format(hwinfo_path, device) proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) for line in proc.stdout: line = _convert_out(line) match = re.match(" ([^:]+): (.*)", line) if match: if match.group(1) == "Capacity": found = re.match(r"(\d+ \w+) \((\d+) bytes\)", match.group(2)) if found: results[match.group(1)] = found.group(1) results['Bytes'] = found.group(2) elif match.group(1) == 'Device File': if ' ' in match.group(2): results[match.group(1)] = re.sub(r'"', '', match.group(2).split(' ')[0]) else: results[match.group(1)] = re.sub(r'"', '', match.group(2)) else: results[match.group(1)] = re.sub(r'"', '', match.group(2)) return results
def _osd(self, device, ids): """ Search for Ceph Data and Journal partitions """ log.debug("Checking partitions {} on device {}".format(ids, device)) guuid_table = {'data': "45B0969E-9B03-4F30-B4C6-B4B80CEFF106", 'journal': "4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D", 'db': "30CD0809-C2B2-499C-8879-2D6B78529876", 'wal': "5CE17FCE-4087-4169-B7FF-056CC58473F9", 'osd_lockbox': "FB3AABF9-D25F-47CC-BF5E-721D1816496B", 'luks_journal': "45B0969E-9B03-4F30-B4C6-35865CEFF106", 'luks_wal': "86A32090-3647-40B9-BBBD-38D8C573AA86", 'luks_db': "166418DA-C469-4022-ADF4-B30AFD37F176", 'plain_wal': "306E8683-4FE2-4330-B7C0-00A917C16966", 'plain_db': "93B0052D-02D9-4D8A-A43B-33A3EE4DFBC3"} sgdisk_path = self._which('sgdisk') for partition_id in ids: cmd = "{} -i {} {}".format(sgdisk_path, partition_id, device) proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) for line in proc.stdout: line = _convert_out(line) if line.startswith("Partition GUID code:"): for guuid_code in guuid_table.values(): if guuid_code in line: log.debug('Found signs that {} belongs to ceph'.format(device)) return True for line in proc.stderr: log.error(line) log.debug("No signs of ceph found on {}. Skipping..".format(device)) return False
def gen_secret(): """ Generate a valid keyring secret for Ceph """ key = os.urandom(16) header = struct.pack('<hiih', 1, int(time.time()), 0, len(key)) keyring = base64.b64encode(header + key) return _convert_out(keyring)
def list_(): """ Find all rbd images """ images = {} proc = Popen(['rados', 'lspools'], stdout=PIPE, stderr=PIPE) for line in proc.stdout: line = _convert_out(line) pool = line.rstrip('\n') cmd = ['/usr/bin/rbd', '-p', pool, 'ls'] rbd_proc = Popen(cmd, stdout=PIPE, stderr=PIPE) for rbd_line in rbd_proc.stdout: rbd_line = _convert_out(rbd_line) if pool not in images: images[pool] = [] images[pool].append(rbd_line.rstrip('\n')) return images
def get_ceph_disks_yml(**kwargs): """ Generates yml representation of Ceph filestores on a given node. Returns something like: {"ceph": {"storage": {"osds": {"/dev/foo": {"format": "filestore", "journal": "/dev/bar"}}}}} """ ceph_disk_list = Popen( "PYTHONWARNINGS=ignore ceph-disk list --format=json", stdout=PIPE, stderr=PIPE, shell=True) out, err = ceph_disk_list.communicate() out = _convert_out(out) err = _convert_out(err) ceph_disks = {"ceph": {"storage": {"osds": {}}}} # Failed `ceph-disk list` if err: return None out_list = json.loads(out) # [ { 'path': '/dev/foo', 'partitions': [ {...}, ... ], ... }, ... ] # The partitions list has all the goodies. for out_dict in out_list: # Grab the path (ie. /dev/foo). path = out_dict["path"] if "path" in out_dict else None # Paranoia: check to make sure we have a path and a "partitions" entry. if path and "paritions" in out_dict: for part_dict in out_dict["partitions"]: # We only care to process OSD "data" partitions. if "type" in part_dict and part_dict["type"] == "data": # Determine if we're dealing with filestore or bluestore. osd_type = _get_osd_type(part_dict) if osd_type == "filestore": _append_fs_to_ceph_disk(ceph_disks, path, part_dict) elif osd_type == "bluestore": _append_bs_to_ceph_disk(ceph_disks, path, part_dict) else: log.warning(("Unable to engulf OSD at {}. Unsupported " "type. Skipping.".format(path))) return ceph_disks
def _lshw(self): """ Parse lshw output into dictionary return: list: lshw output as list of dicts """ results = {} lshw_path = self._which('lshw') cmd = "{} -class disk -xml".format(lshw_path) proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) stdout, stderr = proc.communicate() if stdout: stdout = _convert_out(stdout) data = et.fromstring(stdout) elif stderr: err_msg = "Something went wrong during 'lshw' execution" log.info(err_msg) raise Exception(err_msg) attributes = { 'size': 'Capacity', 'product': 'Model', 'serial': 'Serial ID' } # If find('node')? could this potentially go wrong? for node in data.findall('node'): disk_description = {} if node.find('logicalname') is not None: if node.find('logicalname').text == '/dev/cdrom': continue ident = node.find('logicalname').text if isinstance(ident, list): ident = ident[0] results[ident] = {} for key, attr in six.iteritems(attributes): if node.find(key) is not None: if key == 'size': # Is the MB/GB/TB suffix important enough to add checking for it? disk_description[attr] = str(int(node.find(key).text) / 1000000000) else: disk_description[attr] = node.find(key).text disk_description['Device File'] = self._udevadm(ident) disk_description['Driver'] = self._find_driver() results[ident].update(disk_description) else: log.info('No logicalname found. Cannot identiy that disk.') return results
def _query_disktype(self, device, raid_ctrl, base): """ Query smartctl for a more concise information on it's type. args: device (str): short form of device (sda, sdb) raid_ctrl (dict): dict with raidctrl info id (str): position in disk array? don't know how to fix that TODO: return: bool: 0 if SSD else 1 """ smartctl_path = self._which('smartctl') bus_id = self._return_device_bus_id(device) if not bus_id: log.warning(('Could not find bus_id for {}. Falling back to legacy ' 'detection mode'.format(device))) return self._is_rotational(base) try: cmd = "{} -i /dev/{} -d {},{}".format(smartctl_path, device, raid_ctrl['controller_name'], bus_id) proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) proc.wait() if proc.returncode != 0: log.info("{}\nrc: {} - {}".format(cmd, proc.returncode, proc.stderr.read())) raise RuntimeError("Smartctl failure") for line in proc.stdout: line = _convert_out(line) # ADD PARSING HERE TO DETECT FAILURE if "A mandatory SMART command failed" in line: log.warning("Something went wrong during smartctl query") match = re.match("([^:]+): (.*)", line) if match: if match.group(1) == "Rotation Rate": found = re.match(r"^\s+ Solid State Device", match.group(2)) if found: return '0' return '1' # pylint: disable=bare-except except: # If something fails, fall back to the default detection mode log.warning(('Something went wrong during smartctl query for ' 'device {}. Falling back to legacy detection ' 'mode'.format(device))) return self._is_rotational(base)
def _hw_raid_ctrl_detection(self): """ Calls out for lspci to retrieve information about the underlying RAID-Controller return: (dict): Information about RAID """ info = {} info['controller_name'] = None lspci_path = self._which('lspci') cmd = "{} -vv | grep -i raid".format(lspci_path) # Verify that proc.stdout actually gives something # Or set default to None. # TODO: See if other places are also infected proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) for line in proc.stdout: line = _convert_out(line) # match one of the available raid ctrls # areca, megaraid, 3ware, hprr if 'megaraid' in line.lower(): info['controller_name'] = 'megaraid' elif 'areca' in line.lower() or 'arcmsr' in line.lower(): info['controller_name'] = 'areca' elif '3ware' in line.lower(): info['controller_name'] = '3ware' elif 'hprr' in line.lower(): info['controller_name'] = 'hprr' elif 'hpt' in line.lower(): info['controller_name'] = 'hpt' elif 'cciss' in line.lower(): info['controller_name'] = 'cciss' elif 'aacraid' in line.lower(): info['controller_name'] = 'aacraid' else: info['controller_name'] = None if info['controller_name']: info['raidtype'] = 'hardware' msg = 'Found raidctrl: {}'.format(info['controller_name']) log.info(msg) return info if not info['controller_name']: info['raidtype'] = None log.info("No raidctrl found") return info
def _get_disk_id(partition): """ Return the disk id of a partition/device, or the original partition/device if the disk id is not available. """ disk_id_cmd = Popen("find -L /dev/disk/by-id -samefile " + partition + r" \( -name ata* -o -name nvme* \)", stdout=PIPE, stderr=PIPE, shell=True) # pylint: disable=unused-variable out, err = disk_id_cmd.communicate() # We should only ever have one entry that we return. if out: out = _convert_out(out) return out.rstrip() return partition
def _udevadm(self, device): """ Return the path provided by udevadm TODO: would `readlink -f` have been simpler here? """ cmd = "udevadm info {}".format(device) proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) stdout, stderr = proc.communicate() if stdout: for line in stdout: line = _convert_out(line) if 'by-id' in line: return "/dev/" + line.split()[1] elif stderr: err_msg = "Something went wrong during 'udevadm' execution" log.info(err_msg) raise Exception(err_msg) return device
def _updates_needed(self): """ Checking the output of apt-check for (1) Regular updates (2) Security updates Content is written to stderr """ self._refresh() cmd = "/usr/lib/update-notifier/apt-check" # pylint: disable=unused-variable proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) _, stderr = proc.communicate() stderr = _convert_out(stderr) for cn_err in stderr.split(";"): if int(cn_err) > 0: log.info('Update Needed') return True log.info('No Update Needed') return False
def _get_partition_size(partition): """ Returns partition size in a human readable format. """ blockdev_cmd = Popen("blockdev --getsize64 {}".format(partition), stdout=PIPE, stderr=PIPE, shell=True) # pylint: disable=unused-variable size, err = blockdev_cmd.communicate() try: size = _convert_out(size) size = _convert_size(int(size)) # pylint: disable=unused-variable except ValueError as err: size = "0B" return size
def zypper_ps(role, lsof_map): """ Gets services that need a restart from zypper """ assert role proc1 = Popen(shlex.split('zypper ps -sss'), stdout=PIPE) stdout, _ = proc1.communicate() stdout = _convert_out(stdout) processes_ = processes # adding instead of overwriting, eh? # radosgw is ceph-radosgw in zypper ps. processes_['rgw'] = ['ceph-radosgw', 'radosgw', 'rgw'] # ganesha is called nfs-ganesha processes_['ganesha'] = [ 'ganesha.nfsd', 'rpcbind', 'rpc.statd', 'nfs-ganesha' ] for proc_l in stdout.split('\n'): if '@' in proc_l: proc_l = proc_l.split('@')[0] if proc_l in processes_[role]: lsof_map.append({'name': proc_l}) return lsof_map
def _return_device_bus_id(self, device): """ Tries to get the BUS_ID for a device. Used to query S.M.A.R.T with -d <raidctrl>,<busid> args: device(str): shortname for device(sda, sdb) return: str: bus_id of device """ lsscsi_path = self._which('lsscsi') cmd = lsscsi_path proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) for line in proc.stdout: line = _convert_out(line) if device in line: match = re.match(r'\[(.*?)\]', line) if len(match.group(1).split(":")) >= 2: # try to be less stupid here return match.group(1).split(":")[-2] # is [0:0:ID:0] a fixed format? # check on other machines log.warning("Could not retrieve bus_id for {}").format(device) return None
def get_keyring(**kwargs): """ Retrieve a keyring via `ceph auth get`. Pass key=NAME_OF_KEY, e.g.: use key=client.admin to get the client admin key. Returns either the complete keyring (suitable for use in a ceph keyring file), or None if the key does not exist, or cannot be obtained. This needs to be run on a minion with a suitable ceph.conf and client admin keyring, in order for `ceph auth get` to be able to talk to the cluster. """ if "key" not in kwargs: return None cmd = Popen("ceph auth get " + kwargs["key"], stdout=PIPE, stderr=PIPE, shell=True) # pylint: disable=unused-variable out, err = cmd.communicate() out = _convert_out(out) return out if out else None
def add_users(pathname="/srv/salt/ceph/rgw/cache", jinja="/srv/salt/ceph/rgw/files/users.j2"): """ Write each user to its own file. """ conf_users = __salt__['slsutil.renderer'](jinja) log.debug("users rendered: {}".format(conf_users)) if conf_users is None or 'realm' not in conf_users: return for realm in conf_users['realm']: # Get the existing users. existing_users = users(realm) for user in conf_users['realm'][realm]: if 'uid' not in user or 'name' not in user: raise ValueError('ERROR: please specify both uid and name') filename = "{}/user.{}.json".format(pathname, user['uid']) # Create the RGW user if it does not exist. if not user['uid'] in existing_users: base_cmd = ("radosgw-admin user create --uid={uid} " "--display-name={name} " "--rgw-realm={realm}".format(uid=user['uid'], name=user['name'], realm=realm)) args = '' if 'email' in user: args += " --email={}".format(user['email']) if 'system' in user and user['system']: args += " --system" if 'access_key' in user: args += " --access-key={}".format(user['access_key']) if 'secret' in user: args += " --secret={}".format(user['secret']) command = base_cmd + args proc = Popen(command.split(), stdout=PIPE, stderr=PIPE) filename = "{}/user.{}.json".format(pathname, user['uid']) with open(filename, "w") as _json: for line in proc.stdout: line = _convert_out(line) _json.write(line) for line in proc.stderr: line = _convert_out(line) log.info("stderr: {}".format(line)) proc = Popen(command.split(), stdout=PIPE, stderr=PIPE) with open(filename, "w") as _json: # pylint: disable=redefined-outer-name for line in proc.stdout: line = _convert_out(line) _json.write(line) # pylint: disable=redefined-outer-name for line in proc.stderr: line = _convert_out(line) log.info("stderr: {}".format(line)) proc.wait() else: # Create the JSON file if it does not exist. This happens # when the RGW user was manually created beforehand. # pylint: disable=useless-else-on-loop if not os.path.exists(filename): # pylint: disable=redefined-variable-type args = ['radosgw-admin', 'user', 'info'] args.extend(['--uid', user['uid']]) args.extend(['--rgw-realm', realm]) proc = Popen(args, stdout=PIPE, stderr=PIPE) with open(filename, "w") as _json: for line in proc.stdout: line = _convert_out(line) _json.write(line) for line in proc.stderr: line = _convert_out(line) log.info("stderr: {}".format(line)) proc.wait()