def run(self): vol_id = self.parameters['Volume.vol_id'] volume = NS.gluster.objects.Volume(vol_id=vol_id).load() command = "gluster volume profile %s stop" % volume.name cmd = cmd_utils.Command(command) out, err, rc = cmd.run() if rc != 0: raise AtomExecutionFailedError( "Error while disabling profiling " "for volume: %s in cluster: %s. Error: %s" % (volume.name, NS.tendrl_context.integration_id, err)) loop_count = 0 while True: if loop_count >= 24: raise AtomExecutionFailedError( "Could not disable for volume: %s " "under cluster: %s. Timed out" % (volume.name, NS.tendrl_context.integration_id)) out, err, rc = cmd_utils.Command("gluster volume profile %s info" % volume.name).run() if rc == 1: break else: time.sleep(5) loop_count += 1 volume.profiling_enabled = "no" volume.save() return True
def _enable_disable_volume_profiling(self): cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id).load() volumes = NS.gluster.objects.Volume().load_all() or [] # Enable / disable based on cluster flag volume_profiling_flag # should be done only once while first sync. Later the volume # level volume_profiling_state should be set based on individual # volume level values first_sync_done = etcd_utils.read( "/clusters/%s/nodes/%s/NodeContext/first_sync_done" % (NS.tendrl_context.integration_id, NS.node_context.node_id)).value if first_sync_done in [None, "no", ""]: failed_vols = [] if cluster.volume_profiling_flag == "enable": for volume in volumes: if volume.profiling_enabled == "yes": continue out, err, rc = cmd_utils.Command( "gluster volume profile %s start" % volume.name).run() if (err or rc != 0) and \ "already started" in err: failed_vols.append(volume.name) if len(failed_vols) > 0: logger.log( "debug", NS.publisher_id, { "message": "Profiling already " "enabled for volumes: %s" % str(failed_vols) }) cluster.volume_profiling_state = "enabled" if cluster.volume_profiling_flag == "disable": for volume in volumes: if volume.profiling_enabled == "no": continue out, err, rc = cmd_utils.Command( "gluster volume profile %s stop" % volume.name).run() if (err or rc != 0) and \ "not started" in err: failed_vols.append(volume.name) if len(failed_vols) > 0: logger.log( "debug", NS.publisher_id, { "message": "Profiling not " "enabled for volumes: %s" % str(failed_vols) }) cluster.volume_profiling_state = "disabled" profiling_enabled_count = 0 for volume in volumes: if volume.profiling_enabled == "yes": profiling_enabled_count += 1 if profiling_enabled_count == 0: cluster.volume_profiling_state = "disabled" elif profiling_enabled_count == len(volumes): cluster.volume_profiling_state = "enabled" elif profiling_enabled_count < len(volumes): cluster.volume_profiling_state = "mixed" cluster.save()
def run(self): super(StopServices, self).run() services = self.parameters['Services[]'] for service in services: srv = NS.tendrl.objects.Service(service=service) if not srv.running: logger.log( "debug", NS.publisher_id, { "message": "Service %s not running on " "node %s" % (service, NS.node_context.fqdn) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) continue _cmd_str = "systemctl stop %s" % service cmd = cmd_utils.Command(_cmd_str) err, out, rc = cmd.run() if err: logger.log( "error", NS.publisher_id, { "message": "Could not stop %s" " service. Error: %s" % (service, err) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) return False _cmd_str = "systemctl disable %s" % service cmd = cmd_utils.Command(_cmd_str) err, out, rc = cmd.run() if err: logger.log( "error", NS.publisher_id, { "message": "Could not disable %s" " service. Error: %s" % (service, err) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'], ) return False return True
def get_raw_reference(): base_path = '/dev/disk/' paths = os.listdir(base_path) raw_reference = {} for path in paths: raw_reference[path] = [] full_path = base_path + path cmd = cmd_utils.Command("ls -l %s" % full_path) out, err, rc = cmd.run() if not err: out = out.encode('utf8') count = 0 for line in out.split('\n'): if count == 0: # to skip first line count = count + 1 continue line = line.replace(" ", " ") raw_reference[path].append(line.split(' ', 8)[-1]) else: Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": err})) return raw_reference
def _getNodeMemory(self): '''returns structure {"nodename": [{"TotalSize": "totalsize", "SwapTotal": "swaptotal", "Type": "type"}, ...], ...} ''' cmd = cmd_utils.Command("cat /proc/meminfo") out, err, rc = cmd.run( tendrl_ns.config.data['tendrl_ansible_exec_file']) out = str(out) if out: info_list = out.split('\n') memoinfo = { 'TotalSize': info_list[0].split(':')[1].strip(), 'SwapTotal': info_list[14].split(':')[1].strip() } else: memoinfo = {'TotalSize': '', 'SwapTotal': ''} return memoinfo
def get_lvs(): _lvm_cmd = ("lvm vgs --unquoted --noheading --nameprefixes " "--separator $ --nosuffix --units m -o lv_uuid," "lv_name,data_percent,pool_lv,lv_attr,lv_size," "lv_path,lv_metadata_size,metadata_percent,vg_name") cmd = cmd_utils.Command(_lvm_cmd, True) out, err, rc = cmd.run() if rc != 0: logger.log("debug", NS.publisher_id, {"message": str(err)}) return None d = {} if str(out) != '': try: out = out.split('\n') lst = map( lambda x: dict(x), map(lambda x: [e.split('=') for e in x], map(lambda x: x.strip().split('$'), out))) for i in lst: if i['LVM2_LV_ATTR'][0] == 't': k = "%s/%s" % (i['LVM2_VG_NAME'], i['LVM2_LV_NAME']) else: k = os.path.realpath(i['LVM2_LV_PATH']) d.update({k: i}) except (ValueError, KeyError) as ex: # Keyerror will raise when any changes in attributes name # of lvm output # ValueError will raise when any problem in output format # Because parsing logic will raise error logger.log("debug", NS.publisher_id, {"message": str(ex)}) return d
def _setup_gluster_native_message_reciever(self): service = svc.Service("glustereventsd") message, success = service.start() gluster_eventsd = svc_stat.ServiceStatus("glustereventsd") if not gluster_eventsd.status(): if not success: logger.log( "error", NS.publisher_id, { "message": "glustereventsd could" " not be started: %s" % message }) return False url = "http://{0}:{1}{2}".format(self.host, str(self.port), self.path) cmd = cmd_utils.Command('gluster-eventsapi webhook-add %s' % url) out, err, rc = cmd.run() if rc != 0: severity = "info" if "Webhook already exists" in err else "error" logger.log( severity, NS.publisher_id, { "message": "could not add webhook" " for glustereventsd. {0}: {1}".format(severity, err) }) return True
def _execute_service_command(self, argument): service_cmd = "systemctl " service_cmd += " ".join(argument) if isinstance(argument, tuple) else argument service_cmd += " %s.service" % self.name command = cmd_utils.Command(service_cmd) return command.run()
def get_lvs(): _lvm_cmd = ("lvm vgs --unquoted --noheading --nameprefixes " "--separator $ --nosuffix --units m -o lv_uuid," "lv_name,data_percent,pool_lv,lv_attr,lv_size," "lv_path,lv_metadata_size,metadata_percent,vg_name") cmd = cmd_utils.Command(_lvm_cmd, True) out, err, rc = cmd.run() if rc != 0: Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": str(err)})) return None out = out.split('\n') l = map( lambda x: dict(x), map(lambda x: [e.split('=') for e in x], map(lambda x: x.strip().split('$'), out))) d = {} for i in l: if i['LVM2_LV_ATTR'][0] == 't': k = "%s/%s" % (i['LVM2_VG_NAME'], i['LVM2_LV_NAME']) else: k = os.path.realpath(i['LVM2_LV_PATH']) d.update({k: i}) return d
def _run(self): Event( Message( priority="info", publisher=NS.publisher_id, payload={"message": "%s running" % self.__class__.__name__})) # Check if monitor key exists, if not sync try: NS._int.client.read("clusters/%s/_mon_key" % NS.tendrl_context.integration_id) except etcd.EtcdKeyNotFound: out, err, rc = cmd_utils.Command( "ceph auth get mon. --cluster %s" % NS.tendrl_context.cluster_name).run() if rc != 0: Event( Message(priority="debug", publisher=NS.publisher_id, payload={ "message": "Couldn't get monitor key. Error:%s" % err })) else: if out and out != "": mon_sec = out.split('\n')[1].strip().split( ' = ')[1].strip() NS._int.wclient.write( "clusters/%s/_mon_key" % NS.tendrl_context.integration_id, mon_sec) while not self._complete.is_set(): gevent.sleep(int(NS.config.data.get("sync_interval", 10))) try: NS._int.wclient.write("clusters/%s/sync_status" % NS.tendrl_context.integration_id, "in_progress", prevExist=False) except (etcd.EtcdAlreadyExist, etcd.EtcdCompareFailed) as ex: pass cluster_data = ceph.heartbeat(NS.tendrl_context.cluster_id) self.on_heartbeat(cluster_data) _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id) if _cluster.exists(): _cluster.sync_status = "done" _cluster.last_sync = str(now()) _cluster.save() Event( Message( priority="info", publisher=NS.publisher_id, payload={"message": "%s complete" % self.__class__.__name__}))
def test_find_pid(): setattr(__builtin__, "NS", maps.NamedDict()) NS.publisher_id = "node_agent" NS["config"] = maps.NamedDict() NS.config["data"] = maps.NamedDict(logging_socket_path="test/path") NS.node_context = maps.NamedDict() NS.node_context.node_id = 1 cmd = cmd_utils.Command("systemctl show sshd.service") out, err, rc = cmd.run() sshd_status._find_pid(out)
def sync_cluster_status(volumes, sync_ttl): degraded_count = 0 is_healthy = True # Check if there is a failed import cluster # flow, mark the cluster status as unhealthy _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id).load() if _cluster.current_job.get('job_name', '') == "ImportCluster" and \ _cluster.current_job.get('status', '') == "failed": is_healthy = False # Calculate status based on volumes status if len(volumes) > 0: volume_states = _derive_volume_states(volumes) for vol_id, state in volume_states.iteritems(): if 'down' in state or 'partial' in state: is_healthy = False if 'degraded' in state: degraded_count += 1 # Change status basd on node status cmd = cmd_utils.Command('gluster pool list', True) out, err, rc = cmd.run() peer_count = 0 if not err: out_lines = out.split('\n') connected = True for index in range(1, len(out_lines)): peer_count += 1 node_status_det = out_lines[index].split('\t') if len(node_status_det) > 2: if node_status_det[2].strip() != 'Connected': connected = connected and False if not connected: is_healthy = False cluster_gd = NS.gluster.objects.GlobalDetails().load() old_status = cluster_gd.status or 'unhealthy' curr_status = 'healthy' if is_healthy else 'unhealthy' if curr_status != old_status: msg = ("Health status of cluster: %s " "changed from %s to %s") % (NS.tendrl_context.integration_id, old_status, curr_status) instance = "cluster_%s" % NS.tendrl_context.integration_id event_utils.emit_event( "cluster_health_status", curr_status, msg, instance, 'WARNING' if curr_status == 'unhealthy' else 'INFO') # Persist the cluster status NS.gluster.objects.GlobalDetails( status='healthy' if is_healthy else 'unhealthy', peer_count=peer_count, vol_count=len(volumes), volume_up_degraded=degraded_count).save(ttl=sync_ttl)
def test_run(): setattr(__builtin__, "NS", maps.NamedDict()) NS.publisher_id = "node_agent" NS["config"] = maps.NamedDict() NS.config["data"] = maps.NamedDict(logging_socket_path="test/path") NS.node_context = maps.NamedDict() NS.node_context.node_id = 1 cmd_obj = cmd_utils.Command("ls -a") with patch.object(ansible_module_runner, 'AnsibleRunner', ansible): with pytest.raises(ansible_module_runner.AnsibleModuleNotFound): cmd_obj.run() with patch.object(ansible_module_runner.AnsibleRunner, 'run', run): cmd_obj.run()
def _cleanup_gluster_native_message_reciever(self): url = "http://{0}:{1}{2}".format(self.host, str(self.port), self.path) cmd = cmd_utils.Command('gluster-eventsapi webhook-del %s' % url) out, err, rc = cmd.run() if rc != 0: severity = "info" if "Webhook does not exists" in err else "error" logger.log( severity, NS.publisher_id, { "message": "could not delete webhook from" " glustereventsd. {0}: {1}".format(severity, err) }) return True
def get_node_disks(): disks, disks_map, err = get_disk_details() if not err: cmd = cmd_utils.Command('hwinfo --partition') out, err, rc = cmd.run() if not err: for partitions in out.split('\n\n'): devlist = { "hardware_id": "", "parent_hardware_id": "", "sysfs_id": "", "hardware_class": "", "model": "", "partition_name": "", "device_files": "", "config_status": "", } for partition in partitions.split('\n'): key = partition.split(':')[0] if key.strip() == "Unique ID": devlist["hardware_id"] = \ partition.split(':')[1].lstrip() if key.strip() == "Parent ID": devlist["parent_hardware_id"] = \ partition.split(':')[1].lstrip() if key.strip() == "SysFS ID": devlist["sysfs_id"] = \ partition.split(':')[1].lstrip() if key.strip() == "Hardware Class": devlist["hardware_class"] = \ partition.split(':')[1].lstrip() if key.strip() == "Model": devlist["model"] = \ partition.split(':')[1].lstrip().replace('"', "") if key.strip() == "Device File": _name = partition.split(':')[1].lstrip() devlist["partition_name"] = \ "".join(_name.split(" ")[0]) if key.strip() == "Device Files": devlist["device_files"] = \ partition.split(':')[1].lstrip() if key.strip() == "Config Status": devlist["config_status"] = \ partition.split(':')[1].lstrip() # checking if partition parent id is in collected # disk_ids or not if devlist["parent_hardware_id"] in disks_map: part_name = devlist["partition_name"] parent = disks_map[devlist["parent_hardware_id"]] disks[parent]["partitions"][part_name] = devlist return disks
def _getNodeCpu(self): '''returns structure {"nodename": [{"Architecture": "architecture", "CpuOpMode": "cpuopmode", "CPUs": "cpus", "VendorId": "vendorid", "ModelName": "modelname", "CPUFamily": "cpufamily", "Model": "Model", "CoresPerSocket": "corespersocket"}, ...], ...} ''' cmd = cmd_utils.Command("lscpu") out, err, rc = cmd.run( tendrl_ns.config.data['tendrl_ansible_exec_file']) out = str(out) if out: info_list = out.split('\n') cpuinfo = { 'Architecture': info_list[0].split(':')[1].strip(), 'CpuOpMode': info_list[1].split(':')[1].strip(), 'CPUs': info_list[3].split(':')[1].strip(), 'VendorId': info_list[9].split(':')[1].strip(), 'ModelName': info_list[12].split(':')[1].strip(), 'CPUFamily': info_list[10].split(':')[1].strip(), 'Model': info_list[11].split(':')[1].strip(), 'CoresPerSocket': info_list[6].split(':')[1].strip() } else: cpuinfo = { 'Architecture': '', 'CpuOpMode': '', 'CPUs': '', 'VendorId': '', 'ModelName': '', 'CPUFamily': '', 'Model': '', 'CoresPerSocket': '' } return cpuinfo
def find_status(): """This util is used to find the status of sshd service. It will identify sshd status using process id of sshd service. input: (No input required) output: {"name": "", "port": "", "status": ""} """ sshd = {"name": "", "port": "", "status": ""} cmd = cmd_utils.Command("systemctl show sshd.service") out, err, rc = cmd.run() if not err: pid = _find_pid(out) if pid != 0: p = psutil.Process(pid) result = [ con for con in p.connections() if con.status == psutil.CONN_LISTEN and con.laddr[0] == "0.0.0.0" ] if result: sshd["name"] = p.name() sshd["port"] = int(result[0].laddr[1]) sshd["status"] = result[0].status else: err = "Unable to find ssh port number" Event( Message(priority="debug", publisher="commons", payload={"message": err})) else: err = "sshd service is not running" Event( Message(priority="debug", publisher="commons", payload={"message": err})) else: Event( Message(priority="debug", publisher="commons", payload={"message": err})) return sshd, err
def get_brick_source_and_mount(brick_path): # source and target correspond to fields "Filesystem" and # "Mounted on" from df command output. The below command # gives the filesystem and mount point for a given path, # Eg: "/dev/mapper/tendrlMyBrick4_vg-tendrlMyBrick4_lv " \ # "/tendrl_gluster_bricks/MyBrick4_mount" command = "df --output=source,target " + brick_path.split(":")[-1] cmd = cmd_utils.Command(command) out, err, rc = cmd.run() if rc != 0: logger.log("error", NS.publisher_id, {"message": "%s command failed: %s" % (command, err)}) return None, None return out.split("\n")[-1].split()
def _getNodeOs(self): cmd = cmd_utils.Command("getenforce") out, err, rc = cmd.run() se_out = str(out) os_out = platform.linux_distribution() osinfo = { 'Name': os_out[0], 'OSVersion': os_out[1], 'KernelVersion': platform.release(), 'SELinuxMode': se_out, 'FQDN': socket.getfqdn() } return osinfo
def sync_cluster_status(volumes): # Calculate status based on volumes status degraded_count = 0 is_healthy = True if len(volumes) > 0: volume_states = _derive_volume_states(volumes) for vol_id, state in volume_states.iteritems(): if 'down' in state or 'partial' in state: is_healthy = False if 'degraded' in state: degraded_count += 1 # Raise the alert if volume state changes volume = NS.gluster.objects.Volume(vol_id=vol_id).load() if volume.state != "" and \ state != volume.state: msg = "State of volume: %s " + \ "changed from %s to %s" % ( volume.name, volume.state, state ) instance = "volume_%s" % volume.name event_utils.emit_event("volume_state", state, msg, instance) # Change status basd on node status cmd = cmd_utils.Command('gluster pool list', True) out, err, rc = cmd.run() peer_count = 0 if not err: out_lines = out.split('\n') connected = True for index in range(1, len(out_lines)): peer_count += 1 node_status_det = out_lines[index].split('\t') if len(node_status_det) > 2: if node_status_det[2].strip() != 'Connected': connected = connected and False if not connected: is_healthy = False # Persist the cluster status NS.gluster.objects.GlobalDetails( status='healthy' if is_healthy else 'unhealthy', peer_count=peer_count, vol_count=len(volumes), volume_up_degraded=degraded_count).save()
def run(self): vol_id = self.parameters['Volume.vol_id'] volume = NS.gluster.objects.Volume(vol_id=vol_id).load() command = "gluster volume profile %s stop" % volume.name cmd = cmd_utils.Command(command) out, err, rc = cmd.run() if rc != 0: raise AtomExecutionFailedError( "Error while disabling profiling " "for volume: %s in cluster: %s. Error: %s" % (volume.name, NS.tendrl_context.integration_id, err)) while True: volume = NS.gluster.objects.Volume(vol_id=vol_id).load() if volume.profiling_enabled == "no": break time.sleep(5) return True
def _enable_disable_volume_profiling(self): cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id).load() volumes = NS.gluster.objects.Volume().load_all() or [] failed_vols = [] for volume in volumes: if cluster.enable_volume_profiling == "yes": if volume.profiling_enabled == 'False' or \ volume.profiling_enabled == '': action = "start" else: continue else: if volume.profiling_enabled == 'True': action = "stop" else: continue out, err, rc = cmd_utils.Command("gluster volume profile %s %s" % (volume.name, action)).run() if err or rc != 0: if action == "start" and "already started" in err: volume.profiling_enabled = "True" if action == "stop" and "not started" in err: volume.profiling_enabled = "False" volume.save() failed_vols.append(volume.name) continue else: volume.profiling_enabled = \ "True" if cluster.enable_volume_profiling == \ "yes" else "False" volume.save() if len(failed_vols) > 0: Event( Message(priority="warning", publisher=NS.publisher_id, payload={ "message": "%sing profiling failed for volumes: %s" % (action, str(failed_vols)) }))
def get_raw_reference(): base_path = '/dev/disk/' paths = os.listdir(base_path) raw_reference = {} for path in paths: raw_reference[path] = [] full_path = base_path + path cmd = cmd_utils.Command("ls -l %s" % full_path) out, err, rc = cmd.run() if not err: out = unicodedata.normalize('NFKD', out).encode('utf8', 'ignore') \ if isinstance(out, unicode) \ else unicode(out, errors="ignore").encode('utf8') count = 0 for line in out.split('\n'): if count == 0: # to skip first line count = count + 1 continue line = line.replace(" ", " ") raw_reference[path].append(line.split(' ', 8)[-1]) else: logger.log("debug", NS.publisher_id, {"message": err}) return raw_reference
def run(self): integration_id = self.parameters['TendrlContext.integration_id'] cmd = cmd_utils.Command('gluster volume list') out, err, rc = cmd.run() # default intervel is 6 min # 5 sec sleep for one count increment (360 / 5) wait_count = 72 if not err: volumes = out.split("\n") # 15 sec for each volume wait_count = wait_count + (len(volumes) * 3) # cluster data loop_count = 0 while True: if loop_count >= wait_count: logger.log( "error", NS.publisher_id, {"message": "Timing out import job, Cluster data still " "not fully updated (node: %s) " "(integration_id: %s)" % (integration_id, NS.node_context.node_id) }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id'] ) return False time.sleep(5) _cnc = NS.tendrl.objects.ClusterNodeContext( node_id=NS.node_context.node_id ).load() if _cnc.first_sync_done is not None and \ _cnc.first_sync_done.lower() == "yes": break loop_count += 1 return True
def get_node_network(): """return [{"ipv4": ["ipv4address", ...], "ipv6": ["ipv6address, ..."], "netmask": ["subnet", ...], "subnet": "subnet", "status":"up/down", "interface_id": "", "sysfs_id": "", "device_link": "", "interface_type": "", "model": "", "driver_modules": "", "drive": "", "hw_address": "", "link_detected": "" }, ... ] """ rv = [] network_interfaces = get_node_interface() cmd = cmd_utils.Command('hwinfo --network') out, err, rc = cmd.run() if not err or "vdsmdummy: command not found" in err: for interface in out.split('\n\n'): devlist = { "interface_id": "", "sysfs_id": "", "device_link": "", "interface_type": "", "model": "", "driver_modules": "", "drive": "", "interface": "", "hw_address": "", "link_detected": "" } for line in interface.split('\n'): if "Unique ID" in line: devlist['interface_id'] = \ line.split(':')[1].lstrip() elif "SysFS ID" in line: devlist['sysfs_id'] = \ line.split(':')[1].lstrip() elif "SysFS Device Link" in line: devlist['device_link'] = \ line.split(':')[1].lstrip() elif "Hardware Class" in line: devlist['interface_type'] = \ line.split(':')[1].lstrip() elif "Model" in line: devlist['model'] = \ line.split(':')[1].lstrip().replace('"', "") elif "Driver Modules" in line: devlist['driver_modules'] = \ line.split(':')[1].lstrip().replace('"', "") elif "Driver" in line: devlist['driver'] = \ line.split(':')[1].lstrip().replace('"', "") elif "Device File" in line: devlist['interface'] = \ line.split(':')[1].lstrip() elif "HW Address" in line: devlist['hw_address'] = \ line.split(':')[1].lstrip() elif "Link detected" in line: devlist['link_detected'] = \ line.split(':')[1].lstrip() if devlist["interface"] in network_interfaces: interface_name = devlist["interface"] network_interfaces[interface_name].update(devlist) rv.append(network_interfaces[interface_name]) else: Event( Message(priority="debug", publisher=NS.publisher_id, payload={"message": err})) return rv
def get_disk_details(): disks = {} disks_map = {} cmd = cmd_utils.Command('hwinfo --disk') out, err, rc = cmd.run() if not err: out = unicodedata.normalize('NFKD', out).encode('utf8', 'ignore') \ if isinstance(out, unicode) \ else unicode(out, errors="ignore").encode('utf8') for all_disks in out.split('\n\n'): devlist = { "disk_id": "", "hardware_id": "", "parent_id": "", "disk_name": "", "sysfs_id": "", "sysfs_busid": "", "sysfs_device_link": "", "hardware_class": "", "model": "", "vendor": "", "device": "", "rmversion": "", "serial_no": "", "driver_modules": "", "driver": "", "device_files": "", "device_number": "", "bios_id": "", "geo_bios_edd": "", "geo_logical": "", "size": "", "size_bios_edd": "", "geo_bios_legacy": "", "config_status": "", "partitions": {} } for disk in all_disks.split('\n'): key = disk.split(':')[0] if key.strip() == "Unique ID": devlist["hardware_id"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Parent ID": devlist["parent_id"] = \ disk.split(':')[1].lstrip() elif key.strip() == "SysFS ID": devlist["sysfs_id"] = \ disk.split(':')[1].lstrip() elif key.strip() == "SysFS BusID": devlist["sysfs_busid"] = \ disk.split(':')[1].lstrip() elif key.strip() == "SysFS Device Link": devlist["sysfs_device_link"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Hardware Class": devlist["hardware_class"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Model": devlist["model"] = \ disk.split(':')[1].lstrip().replace('"', "") elif key.strip() == "Vendor": devlist["vendor"] = \ disk.split(':')[1].replace(" ", "").replace('"', "") elif key.strip() == "Device": devlist["device"] = \ disk.split(':')[1].replace(" ", "").replace('"', "") elif key.strip() == "Revision": devlist["rmversion"] = \ disk.split(':')[1].lstrip().replace('"', "") elif key.strip() == "Serial ID": devlist["serial_no"] = \ disk.split(':')[1].replace(" ", "").replace('"', "") elif key.strip() == "Driver": devlist["driver"] = \ disk.split(':')[1].lstrip().replace('"', "") elif key.strip() == "Driver Modules": devlist["driver_modules"] = \ disk.split(':')[1].lstrip().replace('"', "") elif key.strip() == "Device File": _name = disk.split(':')[1].lstrip() devlist["disk_name"] = \ "".join(_name.split(" ")[0]) elif key.strip() == "Device Files": devlist["device_files"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Device Number": devlist["device_number"] = \ disk.split(':')[1].lstrip() elif key.strip() == "BIOS id": devlist["bios_id"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Geometry (Logical)": devlist["geo_logical"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Capacity": devlist["size"] = \ disk.split('(')[1].split()[0] elif key.strip() == "Geometry (BIOS EDD)": devlist["geo_bios_edd"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Size (BIOS EDD)": devlist["size_bios_edd"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Geometry (BIOS Legacy)": devlist["geo_bios_legacy"] = \ disk.split(':')[1].lstrip() elif key.strip() == "Config Status": devlist["config_status"] = \ disk.split(':')[1].lstrip() if ("virtio" in devlist["driver"] and "by-id/virtio" in devlist['device_files']): # split from: # /dev/vdc, /dev/disk/by-id/virtio-0200f64e-5892-40ee-8, # /dev/disk/by-path/virtio-pci-0000:00:08.0 for entry in devlist['device_files'].split(','): if "by-id/virtio" in entry: devlist['disk_id'] = entry.split('/')[-1] break elif "VMware" in devlist["vendor"]: devlist["disk_id"] = \ "{vendor}_{device}_{parent_id}_{hardware_id}".format(**devlist) elif (devlist["vendor"] != "" and devlist["device"] != "" and devlist["serial_no"] != ""): devlist["disk_id"] = (devlist["vendor"] + "_" + devlist["device"] + "_" + devlist["serial_no"]) else: devlist['disk_id'] = devlist['disk_name'] if devlist["disk_id"] in disks.keys(): # Multipath is like multiple I/O paths between # server nodes and storage arrays into a single device # If single device is connected with more than one path # then hwinfo and lsblk will give same device details with # different device names. To avoid this duplicate entry, # If multiple devices exists with same disk_id then # device_name which is lower in alphabetical order is stored. # It will avoid redundacy of disks and next sync it will # make sure same device detail is populated if devlist["disk_name"] < disks[ devlist['disk_id']]['disk_name']: disks[devlist["disk_id"]] = devlist disks_map[devlist['hardware_id']] = devlist["disk_id"] else: disks[devlist["disk_id"]] = devlist disks_map[devlist['hardware_id']] = devlist["disk_id"] return disks, disks_map, err
def get_node_block_devices(disks_map): block_devices = dict(all=list(), free=list(), used=list()) columns = 'NAME,KNAME,PKNAME,MAJ:MIN,FSTYPE,MOUNTPOINT,LABEL,' \ 'UUID,RA,RO,RM,SIZE,STATE,OWNER,GROUP,MODE,ALIGNMENT,' \ 'MIN-IO,OPT-IO,PHY-SEC,LOG-SEC,ROTA,SCHED,RQ-SIZE,' \ 'DISC-ALN,DISC-GRAN,DISC-MAX,DISC-ZERO,TYPE' keys = columns.split(',') lsblk = ("lsblk --all --bytes --noheadings --output='%s' --path --raw" % columns) cmd = cmd_utils.Command(lsblk) out, err, rc = cmd.run() if not err: out = unicodedata.normalize('NFKD', out).encode('utf8', 'ignore') \ if isinstance(out, unicode) \ else unicode(out, errors="ignore").encode('utf8') devlist = map(lambda line: dict(zip(keys, line.split(' '))), out.splitlines()) all_parents = [] parent_ids = [] multipath = {} for dev_info in devlist: device = dict() device['device_name'] = dev_info['NAME'] device['device_kernel_name'] = dev_info['KNAME'] device['parent_name'] = dev_info['PKNAME'] device['major_to_minor_no'] = dev_info['MAJ:MIN'] device['fstype'] = dev_info['FSTYPE'] device['mount_point'] = dev_info['MOUNTPOINT'] device['label'] = dev_info['LABEL'] device['fsuuid'] = dev_info['UUID'] device['read_ahead'] = dev_info['RA'] if dev_info['RO'] == '0': device['read_only'] = False else: device['read_only'] = True if dev_info['RM'] == '0': device['removable_device'] = False else: device['removable_device'] = True device['size'] = dev_info['SIZE'] device['state'] = dev_info['STATE'] device['owner'] = dev_info['OWNER'] device['group'] = dev_info['GROUP'] device['mode'] = dev_info['MODE'] device['alignment'] = dev_info['ALIGNMENT'] device['min_io_size'] = dev_info['MIN-IO'] device['optimal_io_size'] = dev_info['OPT-IO'] device['phy_sector_size'] = dev_info['PHY-SEC'] device['log_sector_size'] = dev_info['LOG-SEC'] device['device_type'] = dev_info['TYPE'] device['scheduler_name'] = dev_info['SCHED'] device['req_queue_size'] = dev_info['RQ-SIZE'] device['discard_align_offset'] = dev_info['DISC-ALN'] device['discard_granularity'] = dev_info['DISC-GRAN'] device['discard_max_bytes'] = dev_info['DISC-MAX'] device['discard_zeros_data'] = dev_info['DISC-ZERO'] device['rotational'] = dev_info['ROTA'] if dev_info['TYPE'] == 'disk': device['ssd'] = is_ssd(dev_info['ROTA']) else: device['ssd'] = False if dev_info['TYPE'] == 'part': device['used'] = True # if partition is under multipath then parent of multipath # is assigned if dev_info['PKNAME'] in multipath.keys(): dev_info['PKNAME'] = multipath[dev_info['PKNAME']] if dev_info['PKNAME'] in disks_map.keys(): device['disk_id'] = disks_map[ dev_info['PKNAME']]['disk_id'] block_devices['all'].append(device) block_devices['used'].append(device['device_name']) if dev_info['TYPE'] == 'disk': if dev_info['NAME'] in disks_map.keys(): device['disk_id'] = disks_map[dev_info['NAME']]['disk_id'] disks_map[dev_info['NAME']]['ssd'] = device['ssd'] all_parents.append(device) if dev_info['TYPE'] == 'mpath': multipath[device['device_kernel_name']] = dev_info['PKNAME'] else: if dev_info['PKNAME'] in multipath.keys(): dev_info['PKNAME'] = multipath[dev_info['PKNAME']] parent_ids.append(dev_info['PKNAME']) for parent in all_parents: if parent['device_name'] in parent_ids: parent['used'] = True block_devices['used'].append(parent['device_name']) else: parent['used'] = False block_devices['free'].append(parent['device_name']) block_devices['all'].append(parent) else: logger.log("debug", NS.publisher_id, {"message": err}) return block_devices
def import_gluster(parameters): logging_file_name = "gluster-integration_logging.yaml" logging_config_file_path = "/etc/tendrl/gluster-integration/" attributes = {} if NS.config.data['package_source_type'] == 'pip': _cmd = "nohup tendrl-gluster-integration &" name = "https://github.com/Tendrl/gluster-integration/archive/master" \ ".tar.gz" attributes["name"] = name attributes["editable"] = "false" ansible_module_path = "packaging/language/pip.py" elif NS.config.data['package_source_type'] == 'rpm': name = "tendrl-gluster-integration" _cmd = "systemctl restart %s" % name ansible_module_path = "packaging/os/yum.py" attributes["name"] = name else: return False logger.log( "info", NS.publisher_id, { 'message': "Installing tendrl-gluster-integration on " "Node %s" % NS.node_context.fqdn }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) try: runner = ansible_module_runner.AnsibleRunner(ansible_module_path, **attributes) except ansible_module_runner.AnsibleModuleNotFound: # Backward compat ansible<=2.2 runner = ansible_module_runner.AnsibleRunner( "core/" + ansible_module_path, **attributes) try: out, err = runner.run() if out['rc'] != 0: logger.log( "error", NS.publisher_id, { "message": "Could not install " "tendrl-gluster-integration on Node %s" "Error: %s" % (NS.node_context.fqdn, out['msg']) }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) return False except ansible_module_runner.AnsibleExecutableGenerationFailed: logger.log( "error", NS.publisher_id, { "message": "Error: Could not install " "tendrl-gluster-integration on Node %s" % NS.node_context.fqdn }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) return False logger.log( "info", NS.publisher_id, { "message": "Generating configuration for " "tendrl-gluster-integration on Node %s" % NS.node_context.fqdn }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) with open(logging_config_file_path + logging_file_name, 'w+') as f: f.write(pkg_resources.resource_string(__name__, logging_file_name)) gluster_integration_tag = NS.compiled_definitions.get_parsed_defs( )['namespace.tendrl']['tags']['tendrl-gluster-integration'] config_data = { "etcd_port": int(NS.config.data['etcd_port']), "etcd_connection": str(NS.config.data['etcd_connection']), "log_cfg_path": (logging_config_file_path + logging_file_name), "log_level": "DEBUG", "logging_socket_path": "/var/run/tendrl/message.sock", "sync_interval": int(NS.config.data['sync_interval']), "tags": [gluster_integration_tag] } etcd_ca_cert_file = NS.config.data.get("etcd_ca_cert_file") etcd_cert_file = NS.config.data.get("etcd_cert_file") etcd_key_file = NS.config.data.get("etcd_key_file") if etcd_ca_cert_file and str( etcd_ca_cert_file) != "" and etcd_cert_file and str( etcd_cert_file) != "" and etcd_key_file and str( etcd_key_file) != "": config_data.update({ "etcd_ca_cert_file": NS.config.data['etcd_ca_cert_file'], "etcd_cert_file": NS.config.data['etcd_cert_file'], "etcd_key_file": NS.config.data['etcd_key_file'] }) _gluster_integration_conf_file_path = \ "/etc/tendrl/gluster-integration/gluster-integration.conf.yaml" with open(_gluster_integration_conf_file_path, 'w') as outfile: yaml.dump(config_data, outfile, default_flow_style=False) logger.log( "info", NS.publisher_id, { "message": "Running tendrl-gluster-integration on Node " "%s" % NS.node_context.fqdn }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) os.chmod(_gluster_integration_conf_file_path, 0o640) if NS.config.data['package_source_type'] == 'rpm': command = cmd_utils.Command( "systemctl enable tendrl-gluster-integration") err, out, rc = command.run() if err: logger.log( "error", NS.publisher_id, { "message": "Could not enable gluster-integration" " service. Error: %s" % err }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) return False cmd = cmd_utils.Command(_cmd) err, out, rc = cmd.run() if err: logger.log( "error", NS.publisher_id, { "message": "Could not start gluster-integration" " service. Error: %s" % err }, job_id=parameters['job_id'], flow_id=parameters['flow_id'], ) return False return True
def run(self): gluster_server_peer_check = True try: integration_id = self.parameters['TendrlContext.integration_id'] # get all detected node ids _node_ids_str = etcd_utils.read( "/indexes/tags/tendrl/integration/%s" % integration_id).value _node_ids = json.loads(_node_ids_str) # exclude current node_id from the list _node_ids.remove(NS.node_context.node_id) # get gluster peer list cmd = cmd_utils.Command('gluster pool list') out, err, _ = cmd.run() gluster_fqdn = {} if not err: lines = out.split('\n')[1:] for line in lines: if line != '': peer = line.split() peer_hostname = peer[-2] if peer_hostname != "localhost": fqdn = socket.gethostbyname(peer_hostname) # if fqdn not found in detected node list then # use this dict to print log message with actual # peer hostnmae gluster_fqdn[fqdn] = peer_hostname else: raise GlusterServerPeerError(err) # Always fqdn match will happen for gluster peer hostname/ip # with detected tendlr nodes hostname/ip for _node_id in _node_ids: node_obj = NS.tendrl.objects.NodeContext( node_id=_node_id).load() node_fqdn = socket.gethostbyname(node_obj.fqdn) if node_fqdn in gluster_fqdn.keys(): del gluster_fqdn[node_fqdn] # if any fqdn remaining then its means not all nodes are detected if len(gluster_fqdn.keys()) > 0: logger.log( "error", NS.publisher_id, { "message": "Gluster servers %s are not yet detected, " "Make sure tendrl-ansible is executed for the these " "nodes" % gluster_fqdn.values() }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id']) gluster_server_peer_check = False except (etcd.EtcdKeyNotFound, ValueError, TypeError, AttributeError, IndexError, KeyError, GlusterServerPeerError) as ex: logger.log("error", NS.publisher_id, { "message": "Unable to compare detected nodes with " "gluster peer list. Error: %s" % ex }, job_id=self.parameters['job_id'], flow_id=self.parameters['flow_id']) gluster_server_peer_check = False return gluster_server_peer_check
def run(self): action = self.parameters["Cluster.volume_profiling_flag"] if action not in VOL_PROFILE_ACTIONS.keys(): raise FlowExecutionFailedError( "Invalid value of Cluster.volume_profiling_flag " "(%s) while enable/disable volume profiling for" "cluster (%s). Valid values are enable/disable" % ( action, NS.tendrl_context.integration_id ) ) _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id ).load() _lock_details = { 'node_id': NS.node_context.node_id, 'tags': NS.node_context.tags, 'type': NS.type, 'job_name': self.__class__.__name__, 'job_id': self.job_id } _cluster.locked_by = _lock_details _cluster.status = "set_volume_profiling" _cluster.current_job = { 'job_id': self.job_id, 'job_name': self.__class__.__name__, 'status': 'in_progress' } _cluster.save() volumes = NS.tendrl.objects.GlusterVolume( NS.tendrl_context.integration_id ).load_all() or [] failed_vols = [] for volume in volumes: out, err, rc = cmd_utils.Command( "gluster volume profile %s %s" % (volume.name, VOL_PROFILE_ACTIONS[action]) ).run() if err != "" or rc != 0: logger.log( "info", NS.publisher_id, { "message": "%s profiling failed for volume: %s." " Error: %s" % (action, volume.name, err) }, job_id=self.parameters["job_id"], flow_id=self.parameters["flow_id"] ) failed_vols.append(volume.name) else: if action == "enable": volume.profiling_enabled = "yes" else: volume.profiling_enabled = "no" volume.save() if len(failed_vols) > 0: logger.log( "info", NS.publisher_id, { "message": "%s profiling failed for " "volumes: %s" % (action, str(failed_vols)) }, job_id=self.parameters['job_id'], flow_id=self.parameters["flow_id"] ) _cluster = NS.tendrl.objects.Cluster( integration_id=NS.tendrl_context.integration_id ).load() _cluster.status = "" _cluster.locked_by = {} _cluster.current_job = { 'status': "finished", 'job_name': self.__class__.__name__, 'job_id': self.job_id } _cluster.volume_profiling_state = "%sd" % action _cluster.save() return True