def add_cache(args): if not configuration().get_node_info().is_storage: print("-1") return job_manager = JobManager() params = '-disk_name {} -partitions {}'.format(args.disk_name, args.partitions) # Getting all running jobs : for j in job_manager.get_running_job_list(): if j.type == JobType.DELETEOSD or j.type == JobType.ADDDISK or \ j.type == JobType.ADDJOURNAL or j.type == JobType.DELETEJOURNAL or \ j.type == JobType.ADDCACHE or j.type == JobType.DELETECACHE: logger.info( "Cannot start add cache job for disk {}. There ara running jobs." .format(args.disk_name)) print("-1") return print(job_manager.add_job(JobType.ADDCACHE, params)) logger.info("Start add cache job for disk {}.".format(args.disk_name)) sys.exit()
def __get_disks(self): all_disks_dict = {} osd_disks = [] journal_disks = [] disk_list = [] ceph_disk_list = ceph_disk_lib.get_disk_list() ph_disk_list = disk_util.get_disk_list() osd_dict = ceph_osd.ceph_osd_tree(configuration().get_node_info().name) try: if ceph_disk_list and len(ceph_disk_list) > 0: for disk in ceph_disk_list: for ph_disk in ph_disk_list: if ph_disk.name == disk.name: ph_disk.usage = disk.usage ph_disk.osd_id = disk.osd_id ph_disk.osd_uuid = disk.osd_uuid disk_list.append(ph_disk) break for node_disk in disk_list: if node_disk.usage == DiskUsage.osd: status = None if osd_dict: status = osd_dict.get(int(node_disk.osd_id), None) if status != None: node_disk.status = status if status == 1: osd_disks.append(node_disk.name) elif node_disk.usage == DiskUsage.journal: journal_disks.append(node_disk.name) all_disks_dict = {"OSDs": osd_disks, "Journals": journal_disks} except Exception as ex: logger.error("Cannot get node disks.") logger.exception(ex.message) return all_disks_dict
def add__node_to_hosts_file(self, remote=True): path = "/etc/hosts" cluster_conf = configuration() current_node = cluster_conf.get_node_info() try: with open(path, mode='a') as f: f.write("{ip} {host}\n".format(ip=current_node.management_ip, host=current_node.name)) if remote: f.write("127.0.0.1 localhost\n") for node in cluster_conf.get_remote_nodes_config( current_node.name): f.write("{ip} {host}\n".format(ip=node.management_ip, host=node.name)) if FileSyncManager().commit_file(path): return True except Exception as e: logger.error("Could not write hosts file.") return False
def __test_mons(): sleeps = [15, 15, 10, 10, 5, 5] tries = 5 mon_in_quorum = [] mon_members = [] cluster_conf = configuration() current_cluster_info = cluster_conf.get_cluster_info() for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) mon_members.append(node_info.name) for host in mon_members: while tries: status = mon_status_check() has_reached_quorum = host in status.get('quorum_names', '') if not has_reached_quorum: tries -= 1 sleep_seconds = sleeps.pop() logger.warning('Waiting %s seconds before retrying', sleep_seconds) time.sleep(sleep_seconds) else: mon_in_quorum.append(host) break if mon_in_quorum == mon_members: logger.info("Ceph monitors are ready.") return True else: logger.info("Ceph monitors are not ready.") return False
def __get_ifaces(self): config = configuration() bonds = config.get_cluster_bonds() eths = [] bond_names =[] try: for bond in bonds: eths.extend(bond.interfaces.split(',')) bond_names.append(bond.name) eths.extend(bond_names) cluster_info =config.get_cluster_info() if not cluster_info.backend_1_eth_name in bond_names and not cluster_info.backend_1_eth_name in eths: eths.append(cluster_info.backend_1_eth_name) if not cluster_info.backend_2_eth_name in bond_names and not cluster_info.backend_2_eth_name in eths: eths.append(cluster_info.backend_2_eth_name) if not cluster_info.management_eth_name in bond_names and not cluster_info.management_eth_name in eths: eths.append(cluster_info.management_eth_name) except Exception as ex: logger.error("Cannot get node ifaces.") logger.exception(ex.message) return eths pass
def update_auth_pools(self, user_name, pool_list): config = configuration() cluster_name = config.get_cluster_name() pool_string = "" if len(pool_list) > 0: for pool in pool_list: pool_string += "\'profile rbd pool=" + pool + "\'," if pool_string[-1] == ",": pool_string = pool_string[:-1] else: pool_string = "\'profile rbd\'" cmd = "ceph auth caps client.{} mgr 'allow r' mon 'profile rbd' osd {} --cluster {}".format( user_name, pool_string, cluster_name) ret, stdout, stderr = exec_command_ex(cmd) if ret != 0: if stderr: logger.error('failed to run cmd ' + cmd) return False return False return True
def readImageMetaData(ioctx, image, pool): ret = None # Get which ceph user is using this function & get his keyring file path # ceph_auth = CephAuthenticator() config = configuration() cluster_name = config.get_cluster_name() try: cmd = "rbd info " + pool + "/" + str( image) + " " + ceph_auth.get_authentication_string( ) + " --cluster " + cluster_name + " | grep rbd_data" ret, stdout, stderr = exec_command_ex(cmd) if ret != 0: if stderr: logger.error("Cannot get image meta object from rbd header.") return None rbd_data = stdout.rstrip().strip() dot_indx = rbd_data.rfind(".") image_id = rbd_data[(dot_indx + 1):] rbd_header_object = "rbd_header." + image_id try: ret = ioctx.get_xattr(rbd_header_object, meta_key) except: ret = ioctx.get_xattr(rbd_header_object[:-1], meta_key) except: return None return ret
''' from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.common.cmd import * from PetaSAN.core.entity.cluster import NodeInfo import json import argparse from PetaSAN.core.common.log import logger from PetaSAN.core.common.cmd import exec_command_ex, call_cmd MAX_OPEN_FILES = 102400 parser = argparse.ArgumentParser(description='This is a script that will start up the configured consul client.') join = '' for node in configuration().get_remote_nodes_config(""): join= join + " -retry-join {} ".format(node.backend_1_ip) logger.info("consul start up string {}".format(join)) str_start_command = "consul agent -config-dir /opt/petasan/config/etc/consul.d/client " str_start_command = str(str_start_command)+ join+' >/dev/null 2>&1 &' subprocess.Popen(str_start_command, shell=True) # Increase max open files for Consul process : # ============================================ pid_cmd = "ps aux | grep consul | grep agent" ret, stdout, stderr = exec_command_ex(pid_cmd) line_1 = stdout.splitlines()[0]
#!/usr/bin/python ''' Copyright (C) 2019 Maged Mokhtar <mmokhtar <at> petasan.org> Copyright (C) 2019 PetaSAN www.petasan.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.config.api import ConfigAPI cluster_info = configuration().get_cluster_info() cluster_info.name = "ceph" config = ConfigAPI() with open( config.get_cluster_info_file_path(), 'w', ) as f: f.write(cluster_info.write_json())
def add_user(self, user_name, auth_pools): backup_nodes_list = [] nodes_list = ManageNode().get_node_list() for node_info in nodes_list: if node_info.is_backup: backup_nodes_list.append(node_info.name) user_info = ConsulAPI().get_replication_user(user_name) if user_info and len(user_info.user_name) > 0: raise ReplicationException( ReplicationException.SYSTEM_USER_EXIST, 'ThisSystemUserAlreadyExistsInNodes:{}'.format( backup_nodes_list)) user = Users() ceph_usr_stat = user.is_ceph_user_exist(user_name) if ceph_usr_stat: raise ReplicationException(ReplicationException.CEPH_USER_EXIST, 'ThisCephUserAlreadyExists') cluster_name = configuration().get_cluster_name() ceph_keyring_path = '/etc/ceph/{}.client.{}.keyring'.format( cluster_name, user_name) replication_user = ReplicationUser() replication_user.user_name = user_name replication_user.auth_pools = auth_pools rsa_encrypt = RSAEncryption() rsa_pub_key = rsa_encrypt.get_key(rsa_encrypt.pub_key_path) user.add_ceph_user(user_name, auth_pools) ceph_keyring_value = self.get_file_content(ceph_keyring_path) enc_ceph_keyring = rsa_encrypt.encrypt_public(ceph_keyring_value, rsa_pub_key) replication_user.ceph_keyring = enc_ceph_keyring pub_file = ConfigAPI().get_replication_user_pubkey_file_path() prv_file = ConfigAPI().get_replication_user_prvkey_file_path() rep_path = ConfigAPI().get_replication_tmp_file_path() if not os.path.exists(rep_path): os.mkdir(rep_path) user.generate_tmp_ssh_keys(pub_file, prv_file) pub_key_value = self.get_file_content(pub_file) replication_user.ssh_pub_key = pub_key_value prv_key_value = self.get_file_content(prv_file) enc_prv_key = rsa_encrypt.encrypt_public(prv_key_value, rsa_pub_key) replication_user.ssh_prv_key = enc_prv_key mng_file = ManageTmpFile() mng_file.delete_tmp_file(pub_file) mng_file.delete_tmp_file(prv_file) consul = ConsulAPI() consul.update_replication_user(replication_user) for node in backup_nodes_list: stat = self.sync_users(node) if not stat: logger.error("error sync users on the node {}".format(node))
as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' import os import sys from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.common.log import logger from PetaSAN.core.config.api import ConfigAPI from PetaSAN.core.common.cmd import call_cmd cluster_name = configuration().get_cluster_name() ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name) ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name) try: cluster_conf = configuration() current_node_info = cluster_conf.get_node_info() current_node_name = current_node_info.name current_cluster_info = cluster_conf.get_cluster_info() config_api = ConfigAPI() os.makedirs("/var/lib/ceph/mon/{}-{}".format(cluster_name, current_node_name)) os.makedirs("/tmp/{}".format(current_node_name))
def add_job(): """ DOCSTRING : this function is called when opening the Add new replication job form page. Args : None Returns : render to the template page : 'admin/replication/add_replication_job.html' """ if request.method == 'GET' or request.method == 'POST': try: backup_nodes = [] destination_clusters_list = [] manage_node = ManageNode() nodes = manage_node.get_node_list() cluster_name = configuration().get_cluster_name(custom_name=True) manage_destination_cluster = ManageDestinationCluster() destination_clusters_dict = manage_destination_cluster.get_replication_dest_clusters( ) for dest_cluster in destination_clusters_dict: destination_clusters_list.append(dest_cluster) destination_clusters_list.sort() for node in nodes: if node.is_backup: backup_nodes.append(node) backup_nodes.sort() if list_err in session: result = session["err"] session.pop("err") return render_template( '/admin/replication/add_replication_job.html', backup_nodes=backup_nodes, cluster_name=cluster_name, destination_clusters_list=destination_clusters_list, err=result) elif list_success in session: result = session["success"] session.pop("success") return render_template( '/admin/replication/add_replication_job.html', backup_nodes=backup_nodes, cluster_name=cluster_name, destination_clusters_list=destination_clusters_list, success=result) elif list_warning in session: result = session["warning"] session.pop("warning") return render_template( '/admin/replication/add_replication_job.html', backup_nodes=backup_nodes, cluster_name=cluster_name, destination_clusters_list=destination_clusters_list, warning=result) else: return render_template( '/admin/replication/add_replication_job.html', backup_nodes=backup_nodes, cluster_name=cluster_name, destination_clusters_list=destination_clusters_list) except CephException as e: if e.id == CephException.CONNECTION_TIMEOUT: session['err'] = "ui_admin_ceph_time_out" elif e.id == CephException.GENERAL_EXCEPTION: session['err'] = "ui_admin_ceph_general_exception" logger.error(e) return redirect(url_for('replication_controller.job_list')) except Exception as e: session['err'] = "ui_admin_add_job_error" logger.error(e) return redirect(url_for('replication_controller.job_list'))
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' from PetaSAN.core.cluster.configuration import configuration import os from PetaSAN.core.common.log import logger from PetaSAN.core.config.api import ConfigAPI cluster_name = configuration().get_cluster_name() node_info = configuration().get_node_info() node_name = node_info.name nodes = configuration().get_management_nodes_config() collected_path = ConfigAPI().get_collect_state_dir() + node_name if not os.path.exists("{}".format(ConfigAPI().get_collect_state_dir())): os.system("mkdir {}".format(ConfigAPI().get_collect_state_dir())) if os.path.exists(collected_path): os.system("rm -rf {}".format(collected_path)) if os.path.exists("{}.tar".format(collected_path)): os.system("rm -rf {}.tar".format(collected_path)) os.mkdir("{}".format(collected_path)) try: for node in nodes:
def __acquire_path(self, path, consul_kv): if self.__ignored_acquire_paths.get(path): logger.info("Ignore forced path {}".format(path)) return logger.debug("Start acquire path {} by node session {}.".format( path, self.__session)) consul_api = ConsulAPI() ceph_api = CephAPI() lio_api = LioAPI() network_api = NetworkAPI() config = configuration() try: disk_id, path_index = str(path).split("/") pool = self._get_pool_by_disk(disk_id) if not pool: logger.error('Could not find pool for disk ' + disk_id) return image_name = self.__image_name_prefix + disk_id logger.debug( "Start read image meta for acquire path {}.".format(path)) all_image_meta = ceph_api.read_image_metadata(image_name, pool) petasan_meta = all_image_meta.get( self.__app_conf.get_image_meta_key()) disk_meta = DiskMeta() disk_meta.load_json(petasan_meta) logger.debug( "End read image meta for acquire path {}.".format(path)) logger.debug("Try to acquire path {}.".format(path)) node_name = config.get_node_name() result = consul_api.lock_disk_path( self.__app_conf.get_consul_disks_path() + path, self.__session, node_name, str(consul_kv.CreateIndex)) if not result: logger.info("Could not lock path {} with session {}.".format( path, self.__session)) elif result: if consul_kv.Value != None and len(str( consul_kv.Value)) > 0 and node_name != str( consul_kv.Value): logger.info("The path {} was locked by {}.".format( path, str(consul_kv.Value))) logger.debug("Node {} will kill node {}.".format( config.get_node_name(), str(consul_kv.Value))) self.__fencing(str(consul_kv.Value)) # we locked it if disk_meta.paths: # if lio has the image name in its backstore already, do not perform rbd mapping if image_name not in self.__backstore: status = ceph_api.map_iamge(image_name, pool) else: status = Status.done if Status.done == status: # Get path info from metadata path_obj = disk_meta.get_paths()[int(path_index) - 1] # add path ips to our network interfaces network_api.add_ip(path_obj.ip, path_obj.subnet_mask, path_obj.eth, path_obj.vlan_id) #update neighbors arp table network_api.update_neighbors_arp( path_obj.ip, path_obj.eth) # add new target in lio if not there already if not lio_api.is_backstore_image_found(image_name): # Give ceph map image complete it job sleep(3) # Add rbd backstores and target status = lio_api.add_target( disk_meta, disk_meta.pool) """ wwn = self.calculate_disk_wwn(disk_meta) status = lio_api.add_target(disk_meta, wwn, disk_meta.pool) """ if Status.done == status: # enable the path we locked to true self.__last_acquire_succeeded = True lio_api.enable_path(disk_meta.iqn, path_index, True) logger.info("Path %s acquired successfully" % path) if self.__acquire_warning_counter > 2: logger.info( "PetaSAN finally succeeded to acquire path after retrying {} times." .format(self.__acquire_warning_counter)) self.__acquire_warning_counter = 0 path_assignment_info = self.__force_acquire_paths.get( path) if path_assignment_info: MangePathAssignment().update_path( path_obj.ip, ReassignPathStatus.succeeded) else: path_assignment_info = self.__force_acquire_paths.get( path) if path_assignment_info: logger.info( "Acquired forced path {}".format(path)) MangePathAssignment().update_path( path_obj.ip, ReassignPathStatus.failed) self.__last_acquire_succeeded = False if self.__acquire_warning_counter > 2: logger.warning( "PetaSAN failed to acquire path after {} times." .format(self.__acquire_warning_counter)) self.__acquire_warning_counter += 1 logger.error("Error could not acquire path %s" % path) else: self.__unlock_consul_path(path) except Exception as e: logger.info("---------------------------------") logger.error(str(e.message) + "\n") logger.exception(e) if str(e.message).find("invalid session") > -1: logger.error("Session is invalid") try: logger.info("Trying to create new session id") self.__session = ConsulAPI().get_new_session_ID( self.__session_name, self.__node_info.name) logger.info("New session id is {}".format(self.__session)) logger.info("Cleaning all mapped disks from old session") self.__clean() except Exception as ex: logger.exception(ex) logger.exception("Could not acquire path %s" % path) raise e logger.debug("End acquire path {}.".format(path)) return
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' from PetaSAN.core.common.enums import BondMode import sys from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.cluster.network import Network from PetaSAN.core.common.cmd import * logger.debug("Loading network configurations.") network = Network() config = configuration() node = config.get_node_info() cluster = config.get_cluster_info() node_management_eth_name = network.get_node_management_interface() node_management_vlan_id = network.get_node_management_vlan_id() node_management_eth_ip = network.get_node_management_ip() node_management_eth_netmask = network.get_node_management_netmask() gateway = Network().get_def_gateway() bonds = config.get_cluster_bonds() jumbo_eths = [] if hasattr(configuration().get_cluster_info(), "jumbo_frames"): jumbo_eths = config.get_cluster_info().jumbo_frames
class Service3: __cluster_info = configuration().get_cluster_info() __node_info = configuration().get_node_info() __app_conf = ConfigAPI() __session_name = ConfigAPI().get_iscsi_service_session_name() __paths_local = set() __session = '0' __paths_per_disk_local = dict() __paths_per_session = dict() __total_cluster_paths = 0 __iqn_tpgs = dict() __local_ips = set() __backstore = set() __current_lock_index = None __image_name_prefix = "" __cluster_info = configuration().get_cluster_info() __node_info = configuration().get_node_info() __exception_sleep_time = 0 __acquire_warning_counter = 0 __last_acquire_succeeded = True __paths_consul_unlocked_firstborn = dict() __paths_consul_unlocked_siblings = dict() __paths_consul_locked_node = dict() __disk_consul_stopped = set() is_service_running = False def __init__(self): if Service3.is_service_running: logger.error("The service is already running.") raise Exception("The service is already running.") Service3.is_service_running = True def __del__(self): Service3.is_service_running = False def __do_process(self): self.__paths_local = set() self.__paths_per_disk_local = dict() self.__paths_per_session = dict() self.__iqn_tpgs = dict() self.__local_ips = set() self.__backstore = set() self.__paths_consul_unlocked_firstborn = dict() self.__paths_consul_unlocked_siblings = dict() self.__paths_consul_locked_node = dict() self.__disk_consul_stopped = set() self.__read_resources_local() self.__read_resources_consul() state_change = False def __read_resources_local(self): logger.debug("Start read local resources.") lio_api = LioAPI() try: self.__backstore = lio_api.get_backstore_image_names() self.__iqn_tpgs = lio_api.get_iqns_with_enabled_tpgs() for iqn, tpgs in self.__iqn_tpgs.iteritems(): disk_id = str(iqn).split(":")[1] for tpg_index, ips in tpgs.iteritems(): self.__paths_local.add("/".join([disk_id, str(tpg_index)])) if ips and len(ips) > 0: for ip in ips: self.__local_ips.add(ip) except Exception as e: logger.error("Could not read consul resources.") raise e logger.debug("End read local resources.") def __read_resources_consul(self): logger.debug("Start read resources consul.") self.__paths_per_session = {} self.__total_cluster_paths = 0 unlock_kvs = set() self.__paths_consul_locked_node = dict() try: disk_kvs = ConsulAPI().get_disk_kvs() for kv in disk_kvs: key = str(kv.Key).replace( self.__app_conf.get_consul_disks_path(), "") disk_id = str(key).split('/')[0] if disk_id in self.__disk_consul_stopped: continue if kv.Value == "disk": disk_id = str(key).split('/')[0] self.__paths_per_disk_local[disk_id] = 0 if str(kv.Flags) == "1": self.__disk_consul_stopped.add(disk_id) continue # Count paths in the cluster. self.__total_cluster_paths += 1 if hasattr(kv, "Session"): disk_id = str(key).split('/')[0] disks = self.__paths_consul_locked_node.get( kv.Session, dict()) paths = disks.get(disk_id, 0) disks[disk_id] = paths + 1 self.__paths_consul_locked_node[kv.Session] = disks # The count of paths for each session if self.__paths_per_session.has_key(kv.Session): count = self.__paths_per_session.get(kv.Session) self.__paths_per_session[kv.Session] = count + 1 else: self.__paths_per_session[kv.Session] = 1 if kv.Session == self.__session: self.__paths_consul_locked_node.add(key) disk_paths_count = self.__paths_per_disk_local.get( disk_id, 0) + 1 self.__paths_per_disk_local[disk_id] = disk_paths_count # unlocked paths elif not hasattr(kv, "Session"): unlock_kvs.add(kv) # Filter unlocked paths for kv in unlock_kvs: key = str(kv.Key).replace( self.__app_conf.get_consul_disks_path(), "") disk_id = str(key).split('/')[0] if self.__paths_per_disk_local.get(disk_id, 0) > 0: self.__paths_consul_unlocked_siblings[key] = kv.CreateIndex else: self.__paths_consul_unlocked_firstborn[ key] = kv.CreateIndex except Exception as e: logger.error("Could not read consul resources.") logger.exception(e) raise e logger.debug("End read resources consul.") def read(self): self.__read_resources_consul() nodes = ConsulAPI().get_sessions_dict(self.__session_name) print "########### First born #########" print print(self.__paths_consul_unlocked_firstborn)
def get_ceph_volumes_info(): ceph_volumes_disks = {} cluster_fsid = '' vg_name = "" partitions_uuid = {} try: cluster_fsid = ceph_disk.get_fsid(configuration().get_cluster_name()) partitions_uuid = ceph_disk.get_partitions_uuid() except Exception as e: logger.error(e) pass cmd = 'ceph-volume --log-path ' + CEPH_VOLUME_LOG_PATH + ' lvm list --format json' ret, stdout, stderr = exec_command_ex(cmd) if ret != 0: if stderr: logger.error(stderr) if len(stdout) > 0: ceph_volumes_info = json.loads(stdout) for osd_id, osd_info in ceph_volumes_info.iteritems(): try: ceph_volume_disk_info = CephVolumeInfo() fsid = '' osd_name = '' for element in osd_info: if element['type'] == 'block' or element['type'] == 'data': fsid = element['tags']['ceph.cluster_fsid'] if len(fsid) > 0 and fsid != cluster_fsid: continue # if not ['tags']['ceph.cluster_fsid'] or element['tags']['ceph.cluster_fsid'] != cluster_fsid: ceph_volume_disk_info.osd_id = osd_id ceph_volume_disk_info.osd_uuid = element['tags'][ 'ceph.osd_fsid'] if len(element['devices']) > 0: for device in element['devices']: part_name = ceph_disk.get_dev_name(device) osd_name = ceph_disk.get_device_name(part_name) ceph_volume_disk_info.devices.append(osd_name) # if there is no devices (physical disks) exists # get them from get_physical_disks function by volume group name else: vg_name = element['vg_name'] lv_name = element['lv_name'] ceph_volume_disk_info.lv_name = lv_name ceph_volume_disk_info.vg_name = vg_name physical_list = lvm_lib.get_physical_disks(vg_name) main_devices = list(physical_list["main"]) writecache_devices = list( physical_list["writecache"]) cache_devices = list(physical_list["dmcache"]) if len(main_devices) > 0: for main_dev in main_devices: main_part_name = ceph_disk.get_dev_name( main_dev) main_dev_name = ceph_disk.get_device_name( main_part_name) ceph_volume_disk_info.devices.append( main_dev_name) if len(writecache_devices) > 0: for wcache in writecache_devices: wcache_partition_name = ceph_disk.get_dev_name( wcache) ceph_volume_disk_info.linked_cache_part_num.append( ceph_disk.get_partition_num( wcache_partition_name)) ceph_volume_disk_info.linked_cache.append( wcache_partition_name) elif len(cache_devices) > 0: for cache in cache_devices: cache_partition_name = ceph_disk.get_dev_name( cache) ceph_volume_disk_info.linked_cache_part_num.append( ceph_disk.get_partition_num( cache_partition_name)) ceph_volume_disk_info.linked_cache.append( cache_partition_name) journal_path = "" # if 'ceph.journal_device' in element['tags']: # journal_path = element['tags']['ceph.journal_device'] # if 'ceph.db_device' in element['tags']: # journal_path = element['tags']['ceph.db_device'] uuid = "" # for filestore : if 'ceph.journal_uuid' in element['tags']: uuid = element['tags']['ceph.journal_uuid'] # for bluestore : if 'ceph.db_uuid' in element['tags']: uuid = element['tags']['ceph.db_uuid'] if len(uuid) > 0 and uuid in partitions_uuid: journal_path = partitions_uuid[uuid] if len(journal_path) > 0: try: if ceph_disk.is_partition(journal_path): journal_name = get_disk_by_partition( journal_path) journal_partition_name = ceph_disk.get_dev_name( journal_path) ceph_volume_disk_info.linked_journal_part_num = ceph_disk.get_partition_num( journal_partition_name) if len(osd_name ) > 0 and osd_name in journal_name: continue ceph_volume_disk_info.linked_journal = journal_name except Exception as ex: logger.error(ex) continue except Exception as e: logger.exception(e) continue for device in ceph_volume_disk_info.devices: ceph_volumes_disks.update({device: ceph_volume_disk_info}) return ceph_volumes_disks
self.__dict__ = json.loads(j) def set_cluster_interface(bonds=[]): if (bonds == None or len(bonds) == 0): return config = configuration() cluster_info = config.get_cluster_info() cluster_info.bonds = bonds config.set_cluster_network_info(cluster_info) logger.info("Updated cluster bonds to 1.3 successfully.") try: old_bonds = configuration().get_cluster_bonds() new_bonds = [] if len(old_bonds) > 0: for ob in old_bonds: if not hasattr(ob, "primary_eth_name"): sys.exit(0) break bond = Bond() bond.is_jumbo_frames = ob.is_jumbo_frames bond.mode = ob.mode bond.name = ob.name bond.interfaces = ob.primary_eth_name + "," + ob.other_eths_names bond.primary_interface = ob.primary_eth_name new_bonds.append(bond)
subprocess.call('rm -rf /opt/petasan/config/etc/hosts',shell=True) subprocess.call('touch /opt/petasan/config/etc/hosts',shell=True) subprocess.call('ln -s /opt/petasan/config/etc/hosts /etc/hosts',shell=True) subprocess.call('rm -rf /etc/systemd/system/[email protected]/',shell=True) subprocess.call('rm -rf /etc/systemd/system/[email protected]/',shell=True) subprocess.call('rm -rf /etc/systemd/system/[email protected]/',shell=True) os.makedirs("/etc/systemd/system/[email protected]/") os.makedirs("/etc/systemd/system/[email protected]/") os.makedirs("/etc/systemd/system/[email protected]/") content ="[Service]\n\ Environment=CLUSTER={}" with open("/etc/systemd/system/[email protected]/override.conf", 'w', ) as f: f.write(content.format(configuration().get_cluster_name())) with open("/etc/systemd/system/[email protected]/override.conf", 'w', ) as f: f.write(content.format(configuration().get_cluster_name())) with open("/etc/systemd/system/[email protected]/override.conf", 'w', ) as f: f.write(content.format(configuration().get_cluster_name())) logger.info('End cleaning config files') #=============================================== logger.info("Starting ceph services") if not call_cmd("systemctl start ceph.target"): logger.error("Can not start ceph services.") sys.exit(0)
def update_role(args): logger.info("Update roles.") config_api = ConfigAPI() try: cluster_config = configuration() node_info = cluster_config.get_node_info() is_dirty = False if str(args.is_storage) == '-1' and str(args.is_iscsi) == '-1': return if str(args.is_storage) == "1": if not node_info.is_storage: logger.info("Update roles 1.") node_info.is_storage = True cluster_config.update_node_info(node_info) is_dirty = True logger.info("Update node storage role to true") else: # ToDO pass if str(args.is_backup) == "1": if not node_info.is_backup: node_info.is_backup = True cluster_config.update_node_info(node_info) is_dirty = True logger.info("Update node backup role to true") else: # ToDO pass if str(args.is_iscsi) == "1": if not node_info.is_iscsi: node_info.is_iscsi = True cluster_config.update_node_info(node_info) is_dirty = True logger.info("Update node iscsi role to true") path = config_api.get_service_files_path() logger.info("Starting PetaSAN service") cmd = "python {}{} >/dev/null 2>&1 &".format( path, config_api.get_petasan_service()) exec_command(cmd) else: # ToDO pass if is_dirty: consul_base_api = BaseAPI() consul_base_api.write_value( config_api.get_consul_nodes_path() + cluster_config.get_node_name(), cluster_config.get_node_info().write_json()) print 1 except Exception as ex: logger.exception(ex.message) print -1 return
def run(args): value = int(args.value) cluster_name = configuration().get_cluster_name() if value == 1: print('backfill_speed choice : very slow') ret1, stdout1, stderr1 = exec_command_ex( "ceph tell osd.* injectargs '--osd_max_backfills 1' --cluster " + cluster_name) ret2, stdout2, stderr2 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_max_active 1' --cluster " + cluster_name) ret3, stdout3, stderr3 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_sleep 2' --cluster " + cluster_name) print('Done changing backfill speed to very slow ...') elif value == 2: print('backfill_speed choice : slow') ret1, stdout1, stderr1 = exec_command_ex( "ceph tell osd.* injectargs '--osd_max_backfills 1' --cluster " + cluster_name) ret2, stdout2, stderr2 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_max_active 1' --cluster " + cluster_name) ret3, stdout3, stderr3 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_sleep 0.7' --cluster " + cluster_name) print('Done changing backfill speed to slow ...') elif value == 3: print('backfill_speed choice : medium') ret1, stdout1, stderr1 = exec_command_ex( "ceph tell osd.* injectargs '--osd_max_backfills 1' --cluster " + cluster_name) ret2, stdout2, stderr2 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_max_active 1' --cluster " + cluster_name) ret3, stdout3, stderr3 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_sleep 0' --cluster " + cluster_name) print('Done changing backfill speed to medium ...') elif value == 4: print('backfill_speed choice : fast') ret1, stdout1, stderr1 = exec_command_ex( "ceph tell osd.* injectargs '--osd_max_backfills 5' --cluster " + cluster_name) ret2, stdout2, stderr2 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_max_active 3' --cluster " + cluster_name) ret3, stdout3, stderr3 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_sleep 0' --cluster " + cluster_name) print('Done changing backfill speed to fast ...') elif value == 5: print('backfill_speed choice : very fast') ret1, stdout1, stderr1 = exec_command_ex( "ceph tell osd.* injectargs '--osd_max_backfills 7' --cluster " + cluster_name) ret2, stdout2, stderr2 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_max_active 7' --cluster " + cluster_name) ret3, stdout3, stderr3 = exec_command_ex( "ceph tell osd.* injectargs '--osd_recovery_sleep 0' --cluster " + cluster_name) print('Done changing backfill speed to very fast ...') else: print("error : failed to set speed value : No such value {}".format( value)) return
def get_ceph_disk_list(): disk_info_list = [] # read fsid for our cluster from config file fsid = None try: fsid = ceph_disk.get_fsid(configuration().get_cluster_name()) except Exception as e: pass journal_linked_osds = {} counter = 0 while True: try: ceph_disk_list_devs = ceph_disk.list_devices() break except Exception as e: if counter == 120: return disk_info_list counter += 1 logger.error(e) time.sleep(1) for device in ceph_disk_list_devs: no_of_partitions = 0 no_of_availabe_partitions = 0 path = device['path'] name = ceph_disk.get_dev_name(path) # check for disk block devices if not name.startswith('sd') and not name.startswith( 'xvd') and not name.startswith('nvme'): continue di = DiskInfo() disk_info_list.append(di) di.name = name di.usage = DiskUsage.no # check if disk is not partitioned if 'partitions' not in device: continue old_osd = False # first check for OSD partitions for partition in device['partitions']: if partition['ptype'] == ptype_osd and 'whoami' in partition: if fsid and partition['ceph_fsid'] == fsid: di.usage = DiskUsage.osd di.osd_id = partition['whoami'] di.osd_uuid = partition['uuid'] if 'journal_dev' in partition: journal = partition['journal_dev'] journal_disk = get_disk_by_partition(journal) if journal_disk != name: di.linked_journal = journal_disk if journal_disk not in journal_linked_osds: journal_linked_osds[journal_disk] = [] journal_linked_osds[journal_disk].append(di.name) if 'block.db_dev' in partition: journal = partition['block.db_dev'] journal_disk = get_disk_by_partition(journal) if journal_disk != name: di.linked_journal = journal_disk if journal_disk not in journal_linked_osds: journal_linked_osds[journal_disk] = [] journal_linked_osds[journal_disk].append(di.name) # do not check further partitons break else: old_osd = True if di.usage == DiskUsage.osd: # go to next disk continue # check for journal disk if not old_osd: no_of_partitions = len(device['partitions']) for partition in device['partitions']: if partition['ptype'] == ptype_journal or partition[ 'ptype'] == ptype_blockdb or partition[ 'ptype'] == journal_avail_ptype: di.usage = DiskUsage.journal if partition['ptype'] == journal_avail_ptype: no_of_availabe_partitions += 1 """ if 'journal_for' in partition: journal_for = partition['journal_for'] journal_for_disk = get_disk_by_partition(journal_for) di.linked_osds.append(journal_for_disk) """ # check for cache partition if partition['ptype'] == cache_used_ptype or partition[ 'ptype'] == cache_avail_ptype: di.usage = DiskUsage.cache if partition['ptype'] == cache_avail_ptype: no_of_availabe_partitions += 1 if di.usage == DiskUsage.journal or di.usage == DiskUsage.cache: if di.usage == DiskUsage.cache and no_of_partitions > 0: di.no_of_partitions = no_of_partitions di.no_available_partitions = no_of_availabe_partitions # go to next disk continue # check for mounted partitions for partition in device['partitions']: if 'mount' in partition: mount_path = partition['mount'] if mount_path is not None and 0 < len(mount_path): di.usage = DiskUsage.mounted # check for system disk if mount_path == '/': di.usage = DiskUsage.system break for di in disk_info_list: if di.usage == DiskUsage.journal and di.name in journal_linked_osds: di.linked_osds = journal_linked_osds[di.name] return disk_info_list
def get_full_disk_list(pid=None): __output_split_text = "##petasan##" disk_list = [] ceph_disk_list = get_disk_list() ph_disk_list = disk_util.get_disk_list() osd_dict = None try: osd_dict = ceph_osd.ceph_osd_tree(configuration().get_node_info().name) except Exception as e: logger.error(e.message) missing_disk_list = [] # Set osd id and usage if ceph_disk_list and len(ceph_disk_list) > 0: for disk in ceph_disk_list: for ph_disk in ph_disk_list: if ph_disk.name == disk.name: ph_disk.usage = disk.usage ph_disk.osd_id = disk.osd_id ph_disk.osd_uuid = disk.osd_uuid ph_disk.linked_journal = disk.linked_journal ph_disk.linked_osds = disk.linked_osds ph_disk.linked_cache = disk.linked_cache ph_disk.linked_cache_part_num = disk.linked_cache_part_num ph_disk.vg_name = disk.vg_name ph_disk.lv_name = disk.lv_name ph_disk.linked_journal_part_num = disk.linked_journal_part_num ph_disk.no_of_partitions = disk.no_of_partitions ph_disk.no_available_partitions = disk.no_available_partitions disk_list.append(ph_disk) break else: disk_list.extend(ph_disk_list) health_test = Smart().get_overall_health() for disk in disk_list: if disk.name in health_test: disk.smart_test = health_test[disk.name] # get all running jobs job_manager = JobManager() job_list = job_manager.get_running_job_list() # Set disk osd status for node_disk in disk_list: # Set osd status [up, down] if node_disk.usage == DiskUsage.osd: status = None if osd_dict and node_disk.osd_id is not None: status = osd_dict.get(int(node_disk.osd_id), None) if str(ceph_osd.get_osd_id(node_disk.osd_uuid)) == "-1": node_disk.status = OsdStatus.no_status node_disk.usage = DiskUsage.mounted node_disk.osd_id = -1 elif status is not None: node_disk.status = status else: node_disk.status = OsdStatus.no_status disk_name_parameter = "-disk_name {}".format(node_disk.name) disk_id_parameter = "-id {}".format(node_disk.osd_id) # loop on running job list for j in job_list: # Set osd status [deleting , adding] if j.type == JobType.ADDDISK and str(j.params).find( str(disk_name_parameter)) > -1: node_disk.status = OsdStatus.adding elif j.type == JobType.ADDJOURNAL and str(j.params).find( str(disk_name_parameter)) > -1: node_disk.status = OsdStatus.adding_journal elif j.type == JobType.ADDCACHE and str(j.params).find( str(disk_name_parameter)) > -1: node_disk.status = OsdStatus.adding_cache elif j.type == JobType.DELETEOSD and ( str(j.params).find(str(disk_name_parameter)) > -1 or str(j.params).find(str(disk_id_parameter)) > -1): node_disk.status = OsdStatus.deleting elif j.type == JobType.DELETEJOURNAL and str(j.params).find( str(disk_name_parameter)) > -1: node_disk.status = OsdStatus.deleting elif j.type == JobType.DELETECACHE and str(j.params).find( str(disk_name_parameter)) > -1: node_disk.status = OsdStatus.deleting # Check if the job completed and has error to return it elif pid and j.id == int(pid): job_output = job_manager.get_job_output(j) if job_output is None: continue job_output = str(job_output).strip() if job_output != "": # We expect our custom messages appear after __output_split_text. out_arr = job_output.split(__output_split_text) if out_arr > 1: node_disk.error_message = out_arr[1] job_manager.remove_job(j.id) if not osd_dict or len(osd_dict.items()) == 0: return disk_list # If there is an osd found in ceph tree and this osd not has disk. for osd_id, osd_status in osd_dict.items(): is_missing = True for disk in disk_list: if str(disk.osd_id) == str(osd_id): is_missing = False break if is_missing: disk = DiskInfo() disk.osd_id = osd_id disk.status = osd_status disk.usage = DiskUsage.osd missing_disk_list.append(disk) disk_list.extend(missing_disk_list) return disk_list
class Service: __cluster_info = configuration().get_cluster_info() __node_info = configuration().get_node_info() __app_conf = ConfigAPI() __session_name = ConfigAPI().get_iscsi_service_session_name() __paths_local = set() __session = '0' __paths_per_disk_local = dict() __paths_per_session = dict() __total_cluster_paths = 0 __iqn_tpgs = dict() __local_ips = set() __backstore = set() __current_lock_index = None __image_name_prefix = "" __cluster_info = configuration().get_cluster_info() __node_info = configuration().get_node_info() __exception_retry_timeout = 0 __failure_timeout = timedelta(minutes=5) + datetime.utcnow() __acquire_warning_counter = 0 __last_acquire_succeeded = True __paths_consul_unlocked_firstborn = dict() __paths_consul_unlocked_siblings = dict() __paths_consul_locked_node = set() __disk_consul_stopped = set() __ignored_acquire_paths = dict() __force_acquire_paths = dict() is_service_running = False def __init__(self): if Service.is_service_running: logger.error("The service is already running.") raise Exception("The service is already running.") Service.is_service_running = True def start(self): self.__image_name_prefix = self.__app_conf.get_image_name_prefix() # Handel the case of cluster has just started if self.__node_info.is_management: clean_thread = threading.Thread(target=self.handle_cluster_startup) clean_thread.start() logger.info("Service is starting.") keep_resources_flag_path = ConfigAPI().get_keep_resources_flag_path() keep_resources_flag = False clean = True #check if file path exist update keep_resources_flag to be True and remove the file if os.path.exists(keep_resources_flag_path): keep_resources_flag = True os.remove(keep_resources_flag_path) # check if no upgrade needed , then get new session if not keep_resources_flag: try: self.__session = ConsulAPI().get_new_session_ID( self.__session_name, self.__node_info.name) except Exception as e: logger.error(e) self.__session = "0" if not self.__session or self.__session is None: self.__session = "0" # check if upgrade needed, then use the current session to keep consul resource else: keep_resources_flag = False try: sessions = ConsulAPI().get_sessions_dict( 'iSCSITarget', self.__node_info.name) if sessions is not None and len(sessions) == 1: consul_session = sessions.values()[0] self.__session = consul_session.ID clean = False else: self.__session = "0" except Exception as ex: logger.error("Could not get Consul sessions") logger.exception(ex) self.__session = "0" if clean: self.__clean() while True: try: if self.__session == "0": self.__session = ConsulAPI().get_new_session_ID( self.__session_name, self.__node_info.name) consul_api = ConsulAPI() self.__current_lock_index = consul_api.current_index() if not self.__current_lock_index: sleep(1) continue self.__process() old_index = self.__current_lock_index self.__current_lock_index = consul_api.watch( self.__current_lock_index) if old_index != self.__current_lock_index: # Give a chance to get all changes that occurred in the same time in cosnul. sleep(2) self.__exception_retry_timeout = 0 self.__failure_timeout = timedelta( minutes=self.__app_conf.get_failure_timeout_duration_min( )) + datetime.utcnow() except (ConnectionError, RetryConsulException) as ex: logger.error("Error on consul connection.") logger.exception(ex) self.__exception_retry_timeout += 5 except Exception as ex: logger.error("Error during process.") logger.exception(ex) self.__exception_retry_timeout += 1 sleep(self.__exception_retry_timeout) if self.__exception_retry_timeout > 10: logger.warning( "PetaSAN could not complete process, there are too many exceptions." ) self.__exception_retry_timeout = 1 sleep(self.__exception_retry_timeout) # Clean all installed configurations if service did not successfully for 5 minutes. if self.__failure_timeout < datetime.utcnow(): logger.warning( "There are too many exceptions.Service will clean this node." ) self.__clean() self.__session = "0" self.__failure_timeout = timedelta( minutes=self.__app_conf.get_failure_timeout_duration_min( )) + datetime.utcnow() def __process(self): logger.debug("Start process, node session id is {}.".format( self.__session)) self.__last_acquire_succeeded = True self.__ignored_acquire_paths = dict() while self.__do_process() != True: pass logger.debug("End process.") def __do_process(self): self.__paths_local = set() self.__paths_per_disk_local = dict() self.__paths_per_session = dict() self.__iqn_tpgs = dict() self.__local_ips = set() self.__backstore = set() self.__paths_consul_unlocked_firstborn = dict() self.__paths_consul_unlocked_siblings = dict() self.__paths_consul_locked_node = set() self.__disk_consul_stopped = set() self.__force_acquire_paths = dict() self.__read_resources_local() self.__read_resources_consul() state_change = False # ====== Step 1: delete any local paths not locked by us in consul ====== for path in self.__paths_local: if path not in self.__paths_consul_locked_node: state_change = True self.__clean_local_path(path) if state_change: logger.info( "PetaSAN cleaned local paths not locked by this node in consul." ) return False # refresh and reprocess # ====== Step 2: remove any consul locks we have but not configured locally ====== for path in self.__paths_consul_locked_node: if path not in self.__paths_local: state_change = True self.__unlock_consul_path(path) if state_change: logger.info( "PetaSAN unlocked any consul locks not configured in this node." ) return False # refresh and reprocess # ====== Step 3: handle stopped disks ====== for disk in self.__disk_consul_stopped: self.__stop_disk(disk) # ====== Step 4: Clean any unused iqns ====== if self.__clean_unused_iqns(): logger.info("PetaSAN cleaned iqns.") return False # refresh and reprocess # ====== Step 5: Clean any unused rbd backstores ====== if self.__clean_unused_rbd_backstore(): logger.info("PetaSAN Cleaned rbd backstores.") return False # refresh and reprocess # ====== Step 6: Clean any unused ips ====== self.__clean_unused_ips() # ====== Step 7: Clean any unused mapped rbd images ====== self.__clean_unused_rbd_images() # ====== Step 8: try to acquire unlocked paths ====== if len(self.__force_acquire_paths) > 0: path, value = self.__force_acquire_paths.items()[0] if path: self.__acquire_path(str(path), value) return False if len(self.__paths_consul_unlocked_firstborn) > 0: path = random.sample(self.__paths_consul_unlocked_firstborn, 1)[0] self.__wait_before_lock(path) self.__acquire_path( str(path), self.__paths_consul_unlocked_firstborn.get(path)) return False if len(self.__paths_consul_unlocked_siblings) > 0: path = random.sample(self.__paths_consul_unlocked_siblings, 1)[0] self.__wait_before_lock(path) self.__acquire_path( str(path), self.__paths_consul_unlocked_siblings.get(path)) return False return True def __read_resources_local(self): logger.debug("Start read local resources.") lio_api = LioAPI() try: self.__backstore = lio_api.get_backstore_image_names() self.__iqn_tpgs = lio_api.get_iqns_with_enabled_tpgs() for iqn, tpgs in self.__iqn_tpgs.iteritems(): disk_id = str(iqn).split(":")[1] for tpg_index, ips in tpgs.iteritems(): self.__paths_local.add("/".join([disk_id, str(tpg_index)])) if ips and len(ips) > 0: for ip in ips: self.__local_ips.add(ip) except Exception as e: logger.error("Could not read consul resources.") raise e logger.debug("End read local resources.") def __read_resources_consul(self): logger.debug("Start read resources consul.") self.__paths_per_session = {} self.__total_cluster_paths = 0 unlock_kvs = set() consul_api = ConsulAPI() try: disk_kvs = consul_api.get_disk_kvs() for kv in disk_kvs: key = str(kv.Key).replace( self.__app_conf.get_consul_disks_path(), "") disk_id = str(key).split('/')[0] if disk_id in self.__disk_consul_stopped: continue if kv.Value == "disk": disk_id = str(key).split('/')[0] self.__paths_per_disk_local[disk_id] = 0 if str(kv.Flags) == "1": self.__disk_consul_stopped.add(disk_id) continue # Count paths in the cluster. self.__total_cluster_paths += 1 if hasattr(kv, "Session"): # locked paths if kv.Session == self.__session: self.__paths_consul_locked_node.add(key) disk_paths_count = self.__paths_per_disk_local.get( disk_id, 0) + 1 self.__paths_per_disk_local[disk_id] = disk_paths_count # Total count of paths for each session if self.__paths_per_session.has_key(kv.Session): count = self.__paths_per_session.get(kv.Session) self.__paths_per_session[kv.Session] = count + 1 else: self.__paths_per_session[kv.Session] = 1 # unlocked paths elif not hasattr(kv, "Session"): unlock_kvs.add(kv) # Filter unlocked paths reassignments = None if len(unlock_kvs) > 0: reassignments = MangePathAssignment().get_forced_paths() for kv in unlock_kvs: key = str(kv.Key).replace( self.__app_conf.get_consul_disks_path(), "") if reassignments: path_assignment_info = reassignments.get(key) if path_assignment_info and path_assignment_info.target_node == self.__node_info.name: self.__force_acquire_paths[key] = kv continue else: self.__ignored_acquire_paths[key] = kv continue disk_id = str(key).split('/')[0] if self.__paths_per_disk_local.get(disk_id, 0) > 0: self.__paths_consul_unlocked_siblings[key] = kv else: self.__paths_consul_unlocked_firstborn[key] = kv except Exception as e: logger.error("Could not read consul resources.") logger.exception(e) raise e logger.debug("End read resources consul.") def __clean_local_path(self, path): disk_id, path_index = str(path).split("/") logger.debug("Start clean disk path {}.".format(path)) image_name = self.__image_name_prefix + str(disk_id) ceph_api = CephAPI() lio_api = LioAPI() network_api = NetworkAPI() try: # Get iqn. logger.debug("Start get disk meta to clean path {}.".format(path)) # iqn = ceph_api.get_disk_meta(disk_id, pool).iqn iqn = self._get_iqn_by_disk(disk_id) logger.debug("End get disk meta to clean path {}.".format(path)) # Get tpgs for iqn. tpgs = self.__iqn_tpgs.get(iqn, None) if not iqn or not tpgs or len(tpgs) == 0: logger.info("Could not find ips for %s " % image_name) # Remove the assigned ips from our interfaces elif tpgs and len(tpgs) > 0: # Get assigned ips for each path. for tpg, ips in tpgs.iteritems(): if tpg == path_index: for ip in ips: logger.debug( "Delete ip {} to clean path {}.".format( ip, path)) if not network_api.delete_ip( ip, self.__cluster_info.iscsi_1_eth_name): network_api.delete_ip( ip, self.__cluster_info.iscsi_2_eth_name) lio_api.disable_path(iqn, path_index) logger.info("Cleaned disk path {}.".format(path)) break except Exception as e: logger.error("Could not clean disk path for %s" % image_name) raise e logger.debug("End clean disk path {}.".format(path)) return # If all tpgs related to iqn are disable, system will remove iqn. def __clean_unused_iqns(self): status = False lio_api = LioAPI() for iqn in lio_api.get_unused_iqns(): disk_id = str(iqn).split(":")[1] image_name = self.__image_name_prefix + str(disk_id) lio_api.delete_target(image_name, iqn) CephAPI().unmap_image(image_name) status = True logger.debug("Clean unused iqn {}.".format(iqn)) return status def __clean_unused_rbd_backstore(self): status = False iqns = self.__iqn_tpgs.keys() for rbd_backstore in self.__backstore: rbd_backstore_disk_id = str(rbd_backstore).replace( self.__image_name_prefix, "") is_used = False for iqn in iqns: disk_id = str(iqn).split(":")[1] if disk_id == rbd_backstore_disk_id: is_used = True break if not is_used: LioAPI().delete_backstore_image(rbd_backstore) logger.debug( "Clean unused lio backstore {}.".format(rbd_backstore)) status = True return status def __clean_unused_ips(self): ips = Network().get_all_configured_ips() for ip, eth_name in ips.iteritems(): ip, netmask = str(ip).split("/") if ip not in self.__local_ips and ip != self.__node_info.backend_1_ip and \ ip != self.__node_info.backend_2_ip and ip != self.__node_info.management_ip: NetworkAPI().delete_ip(ip, eth_name, netmask) logger.debug("Clean unused ip {} on interface {}.".format( ip, eth_name)) def __clean_unused_rbd_images(self): ceph_api = CephAPI() rbd_images = ceph_api.get_mapped_images() if rbd_images is None: return for image, mapped_count in rbd_images.iteritems(): if image not in self.__backstore: if int(mapped_count) > 0: for i in range(0, int(mapped_count)): ceph_api.unmap_image(image) logger.debug("Unmapped unused image {}.".format(image)) def __unlock_consul_path(self, path): try: logger.debug("Unlock {} path locked by session {}.".format( path, self.__session)) consul_api = ConsulAPI() consul_api.release_disk_path( self.__app_conf.get_consul_disks_path() + path, self.__session, None) logger.info("Unlock path %s" % path) except Exception as e: logger.error("Could not unlock path %s" % path) raise e def __stop_disk(self, disk_id): consul_api = ConsulAPI() ceph_api = CephAPI() lio_api = LioAPI() network_api = NetworkAPI() logger.info("Stopping disk %s" % disk_id) image_name = self.__image_name_prefix + str(disk_id) try: # Get iqn. #iqn = ceph_api.get_disk_meta(disk_id, pool).iqn iqn = self._get_iqn_by_disk(disk_id) # Get tpgs for iqn. tpgs = self.__iqn_tpgs.get(iqn, None) if not iqn or not tpgs or len(tpgs) == 0: logger.error("Could not find ips for %s " % image_name) # Remove the assigned ips from our interfaces elif tpgs and len(tpgs) > 0: # Get assigned ips for each path. for tpg, ips in tpgs.iteritems(): for ip in ips: if not network_api.delete_ip( ip, self.__cluster_info.iscsi_1_eth_name): network_api.delete_ip( ip, self.__cluster_info.iscsi_2_eth_name) lio_api.delete_target(image_name, iqn) ceph_api.unmap_image(image_name) sleep(2) pool = self._get_pool_by_disk(disk_id) if not pool: logger.error('Could not find pool for disk ' + disk_id) return if not ceph_api.is_image_busy(image_name, pool): consul_api.delete_disk( self.__app_conf.get_consul_disks_path() + disk_id, None, True) logger.info( "PetaSAN removed key of stopped disk {} from consul.". format(disk_id)) except Exception as e: logger.info("Could not stop disk %s" % disk_id) return def __acquire_path(self, path, consul_kv): if self.__ignored_acquire_paths.get(path): logger.info("Ignore forced path {}".format(path)) return logger.debug("Start acquire path {} by node session {}.".format( path, self.__session)) consul_api = ConsulAPI() ceph_api = CephAPI() lio_api = LioAPI() network_api = NetworkAPI() config = configuration() try: disk_id, path_index = str(path).split("/") pool = self._get_pool_by_disk(disk_id) if not pool: logger.error('Could not find pool for disk ' + disk_id) return image_name = self.__image_name_prefix + disk_id logger.debug( "Start read image meta for acquire path {}.".format(path)) all_image_meta = ceph_api.read_image_metadata(image_name, pool) petasan_meta = all_image_meta.get( self.__app_conf.get_image_meta_key()) disk_meta = DiskMeta() disk_meta.load_json(petasan_meta) logger.debug( "End read image meta for acquire path {}.".format(path)) logger.debug("Try to acquire path {}.".format(path)) node_name = config.get_node_name() result = consul_api.lock_disk_path( self.__app_conf.get_consul_disks_path() + path, self.__session, node_name, str(consul_kv.CreateIndex)) if not result: logger.info("Could not lock path {} with session {}.".format( path, self.__session)) elif result: if consul_kv.Value != None and len(str( consul_kv.Value)) > 0 and node_name != str( consul_kv.Value): logger.info("The path {} was locked by {}.".format( path, str(consul_kv.Value))) logger.debug("Node {} will kill node {}.".format( config.get_node_name(), str(consul_kv.Value))) self.__fencing(str(consul_kv.Value)) # we locked it if disk_meta.paths: # if lio has the image name in its backstore already, do not perform rbd mapping if image_name not in self.__backstore: status = ceph_api.map_iamge(image_name, pool) else: status = Status.done if Status.done == status: # Get path info from metadata path_obj = disk_meta.get_paths()[int(path_index) - 1] # add path ips to our network interfaces network_api.add_ip(path_obj.ip, path_obj.subnet_mask, path_obj.eth, path_obj.vlan_id) #update neighbors arp table network_api.update_neighbors_arp( path_obj.ip, path_obj.eth) # add new target in lio if not there already if not lio_api.is_backstore_image_found(image_name): # Give ceph map image complete it job sleep(3) # Add rbd backstores and target status = lio_api.add_target( disk_meta, disk_meta.pool) """ wwn = self.calculate_disk_wwn(disk_meta) status = lio_api.add_target(disk_meta, wwn, disk_meta.pool) """ if Status.done == status: # enable the path we locked to true self.__last_acquire_succeeded = True lio_api.enable_path(disk_meta.iqn, path_index, True) logger.info("Path %s acquired successfully" % path) if self.__acquire_warning_counter > 2: logger.info( "PetaSAN finally succeeded to acquire path after retrying {} times." .format(self.__acquire_warning_counter)) self.__acquire_warning_counter = 0 path_assignment_info = self.__force_acquire_paths.get( path) if path_assignment_info: MangePathAssignment().update_path( path_obj.ip, ReassignPathStatus.succeeded) else: path_assignment_info = self.__force_acquire_paths.get( path) if path_assignment_info: logger.info( "Acquired forced path {}".format(path)) MangePathAssignment().update_path( path_obj.ip, ReassignPathStatus.failed) self.__last_acquire_succeeded = False if self.__acquire_warning_counter > 2: logger.warning( "PetaSAN failed to acquire path after {} times." .format(self.__acquire_warning_counter)) self.__acquire_warning_counter += 1 logger.error("Error could not acquire path %s" % path) else: self.__unlock_consul_path(path) except Exception as e: logger.info("---------------------------------") logger.error(str(e.message) + "\n") logger.exception(e) if str(e.message).find("invalid session") > -1: logger.error("Session is invalid") try: logger.info("Trying to create new session id") self.__session = ConsulAPI().get_new_session_ID( self.__session_name, self.__node_info.name) logger.info("New session id is {}".format(self.__session)) logger.info("Cleaning all mapped disks from old session") self.__clean() except Exception as ex: logger.exception(ex) logger.exception("Could not acquire path %s" % path) raise e logger.debug("End acquire path {}.".format(path)) return def __clean(self): logger.info("Cleaning unused configurations. ") logger.info("Cleaning all mapped disks") ceph_api = CephAPI() lio_api = LioAPI() network_api = NetworkAPI() # Get tpgs of each iqn for iqn, tpgs in lio_api.get_iqns_with_tpgs().iteritems(): try: disk_id = str(iqn).split(":")[1] # Get assigned ips for each tpg for tpg, ips in tpgs.iteritems(): if ips and len(ips) > 0: for ip in ips: # 1- Remove ip from network interface. if not network_api.delete_ip( ip, self.__cluster_info.iscsi_1_eth_name): network_api.delete_ip( ip, self.__cluster_info.iscsi_2_eth_name) # 2- Delete iqn ,delete image from rbd backstore and unmap image. image_name = self.__image_name_prefix + str(disk_id) lio_api.delete_target(image_name, iqn) ceph_api.unmap_image(image_name) except Exception as e: logger.error("Error cleaning all mapped disks, disk %s " % image_name) logger.exception(e.message) # 3- From backstore for image_name in lio_api.get_backstore_image_names(): try: lio_api.delete_backstore_image(image_name) ceph_api.unmap_image(image_name) except Exception as e: logger.error("Error cleaning all mapped disks, disk %s " % image_name) logger.info("Cleaning unused rbd images.") try: self.__clean_unused_rbd_images() except: logger.error("Error cleaning unused rbd images.") logger.info("Cleaning unused ips.") try: self.__local_ips = set() self.__clean_unused_ips() except: logger.error("Cleaning unused ips.") def __wait_before_lock(self, path=None): disk_id, path_index = str(path).split("/") wait_time = 0 if path: # 1- Calc wait time if path has siblings. wait_time = int(self.__app_conf.get_siblings_paths_delay()) * int( self.__paths_per_disk_local.get(disk_id, 0)) logger.debug("Wait time for siblings is {}.".format(wait_time)) total_nodes = len(ConsulAPI().get_consul_members()) # 2- Calc average paths per node. average_node_paths = float( self.__total_cluster_paths) / float(total_nodes) # Calc the percent of local paths according to average paths. percent = float(self.__paths_per_session.get(self.__session, 0)) / average_node_paths # 3- Calc total wait time if self.__last_acquire_succeeded: wait_time += int( self.__app_conf.get_average_delay_before_lock()) * percent else: logger.debug("Skipping wait time for average delay.") logger.debug( "Wait time depending on average and siblings is {}.".format( math.ceil(wait_time))) sleep(math.ceil(wait_time)) def __wait_after_lock(self): pass def __fencing(self, node_name): maintenance = ManageMaintenance() if maintenance.get_maintenance_config( ).fencing == MaintenanceConfigState.off: logger.warning( "Fencing action will not fire the admin stopped it,the cluster is in maintenance mode." ) return node_list = ConsulAPI().get_node_list() for node in node_list: if str(node.name) == node_name: if Network().ping(node.backend_2_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.backend_2_ip)) ssh().call_command(node.backend_2_ip, " poweroff ", 5) break elif Network().ping(node.management_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.management_ip)) ssh().call_command(node.management_ip, " poweroff ", 5) break elif Network().ping(node.backend_1_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.backend_1_ip)) ssh().call_command(node.backend_1_ip, " poweroff ", 5) break def handle_cluster_startup(self): i = 0 consul_api = ConsulAPI() logger.debug("Check cluster startup.") while True: try: current_node_name = self.__node_info.name result = consul_api.set_leader_startup_time( current_node_name, str(i)) if i == 0 and not result: sleep(2) continue elif result: # value returned, consul is up and running sleep(2) number_of_started_nodes = 0 for kv in consul_api.get_leaders_startup_times(): node_name = str(kv.Key).replace( ConfigAPI().get_consul_leaders_path(), "") if node_name != current_node_name: if int(kv.Value) == 0: number_of_started_nodes += 1 logger.debug("Number of started nodes = {}.".format( number_of_started_nodes)) # Another management node is just starting if i == 0 and number_of_started_nodes > 0: logger.info( "Cluster is just starting, system will delete all active disk resources" ) consul_api.delete_disk( ConfigAPI().get_consul_disks_path(), recurse=True) i += 1 sleep(58) except Exception as ex: logger.debug("Start up error") logger.exception(ex) # maybe other management nodes are starting, give them a chance to start if i == 0: sleep(2) else: i += 1 sleep(58) def _get_pool_by_disk(self, disk_id): consul_api = ConsulAPI() ceph_api = CephAPI() pool = consul_api.get_disk_pool(disk_id) if pool: logger.info('Found pool:{} for disk:{} via consul'.format( pool, disk_id)) return pool pool = ceph_api.get_pool_bydisk(disk_id) if pool: logger.info('Found pool:{} for disk:{} via ceph'.format( pool, disk_id)) return pool logger.error('Could not find pool for disk ' + disk_id) return None def _get_iqn_by_disk(self, disk_id): for iqn in self.__iqn_tpgs: if disk_id == str(iqn).split(":")[1]: return iqn return None
def prepare_osd(device_name, journal=None): config = configuration() storage_engine = config.get_cluster_info().storage_engine if storage_engine == "filestore": return prepare_filestore(device_name, journal) return prepare_bluestore(device_name, journal)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' import sys from PetaSAN.core.ceph.deploy.build import mon_status_check from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.common.log import logger cluster_conf = configuration() quorum_size = "0" if cluster_conf.are_all_mgt_nodes_in_cluster_config(): try: quorum_size = len(mon_status_check().get('quorum')) sys.stdout.write(str(quorum_size)) if int(quorum_size) == 3: sys.exit(0) # healthy cluster except Exception as ex: logger.error("Cluster not running") sys.stdout.write("-1") sys.exit(-1) else:
def clear_disk(args): disk_id = args.disk_id image_name = "image-" + disk_id try: # Get which ceph user is using this function & get his keyring file path # # ---------------------------------------------------------------------- # ceph_auth = CephAuthenticator() config = configuration() cluster_name = config.get_cluster_name() # Get disk metadata : # ------------------- ceph_api = CephAPI() disk_metadata = ceph_api.get_diskmeta(disk_id) # Get pool name : # --------------- pool_name = disk_metadata.pool data_pool = "" # Check if disk has been created on replicated pool or erasure pool : # ------------------------------------------------------------------- if len(disk_metadata.data_pool) > 0: data_pool = disk_metadata.data_pool tmp_image_name = "tmp_disk_" + disk_metadata.id # (1.) Check if a previous tmp image for this disk is still existed : # =================================================================== images_list = ceph_api.get_all_images(pool_name) for image in images_list: if tmp_image_name in image: # Delete image # cmd = "rbd rm {}/{} {} --cluster {}".format( pool_name, image, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot remove tmp image ,\ncmd : " + cmd) sys.exit(-1) print( "Stage 1 :\n\tCheck if a previous tmp image for this disk is still existed > (Completed)" ) logger.info( "Stage 1 :\n\tCheck if a previous tmp image for this disk is still existed > (Completed)" ) # (2.) Stop old disk : # ==================== consul_api = ConsulAPI() kv = consul_api.find_disk(disk_id) if kv is not None: manage_disk = ManageDisk() status = manage_disk.stop(disk_id) if status != Status.done: print('Error : Cannot stop disk , id = ' + disk_id) sys.exit(-1) print("Stage 2 :\n\tStop old disk > (Completed)") logger.info("Stage 2 :\n\tStop old disk > (Completed)") time.sleep(3) # (3.) Check if old disk is stopped or not : # ========================================== if len(data_pool) > 0: pool_type = "erasure" _confirm_disk_stopped(data_pool, disk_id, pool_type) else: pool_type = "replicated" _confirm_disk_stopped(pool_name, disk_id, pool_type) print( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) logger.info( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) else: print("Stage 2 :\n\tStop old disk > (Completed)") logger.info("Stage 2 :\n\tStop old disk > (Completed)") print( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) logger.info( "Stage 3 :\n\tConfirm that disk is completely stopped > (Completed)" ) print('\tclear_disk.py script : disk {} is already stopped'.format( disk_id)) # (4.) Create a tmp image (not PetaSAN image) : # ============================================= # Generate a random value between 1 and 99999 # random_no = str(random.randint(1, 100000)) tmp_image_name = tmp_image_name + "_" + str(random_no) image_size = disk_metadata.size * 1024 if len(data_pool) > 0: cmd = "rbd create {}/{} --size {} --data-pool {} {} --cluster {}".format( pool_name, tmp_image_name, image_size, data_pool, ceph_auth.get_authentication_string(), cluster_name) else: cmd = "rbd create {}/{} --size {} {} --cluster {}".format( pool_name, tmp_image_name, image_size, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot create new tmp image ,\ncmd : " + cmd) sys.exit(-1) print("Stage 4 :\n\tCreate a tmp image called ( " + tmp_image_name + " ) > (Completed)") logger.info("Stage 4 :\n\tCreate a tmp image called ( " + tmp_image_name + " ) > (Completed)") # (5.) Run script to copy "old disk" metadata to new "tmp_disk" : # =============================================================== metadata_script_file = ConfigAPI().get_disk_meta_script_path() # Function : read_disks_metadata : parser_key_1 = "read" arg_1 = "--image" arg_2 = "--pool" # Function : set_disk_metadata : parser_key_2 = "write" arg_3 = "--file" cmd = metadata_script_file + " " + parser_key_1 + " " + arg_1 + " " + image_name + " " + arg_2 + " " + pool_name +\ " | " + metadata_script_file + " " + parser_key_2 + " " + arg_1 + " " + tmp_image_name + " " + arg_2 + " " + pool_name if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot copy metadata from old disk to new tmp image ,\ncmd : " + cmd) sys.exit(-1) print( "Stage 5 :\n\tRun script to copy 'old disk' metadata to new 'tmp_disk' > (Completed)" ) logger.info( "Stage 5 :\n\tRun script to copy 'old disk' metadata to new 'tmp_disk' > (Completed)" ) time.sleep(3) # (6.) Remove metadata of old disk : # =========================================================== old_image_name = str(ceph_api.conf_api.get_image_name_prefix() + disk_metadata.id) confirm = ceph_api.remove_disk_metadata(old_image_name, disk_metadata.pool) if not confirm: print( "Error : clear_disk.py script : cannot remove metadata of old disk" ) # sys.exit(-1) print("Stage 6 :\n\tRemove metadata of old disk > (Completed)") logger.info("Stage 6 :\n\tRemove metadata of old disk > (Completed)") # (7.) Rename old disk image name with "deleted-" + disk_id + random_no: # ====================================================================== new_image_name = "deleted-" + disk_metadata.id + "-" + random_no cmd = "rbd mv {}/{} {} {} --cluster {}".format( pool_name, image_name, new_image_name, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot rename old image from {} to {} ,\ncmd : {}" .format(image_name, new_image_name, cmd)) sys.exit(-1) print("Stage 7 :\n\tRename old disk image name with ( " + new_image_name + " ) > (Completed)") logger.info("Stage 7 :\n\tRename old disk image name with ( " + new_image_name + " ) > (Completed)") time.sleep(5) # (8.) Rename "tmp_disk" with old disk image name : # ================================================= cmd = "rbd mv {}/{} {} {} --cluster {}".format( pool_name, tmp_image_name, image_name, ceph_auth.get_authentication_string(), cluster_name) if not call_cmd(cmd): print( "Error : clear_disk.py script : cannot rename \"tmp_disk\" from {} to {} ,\ncmd : {}" .format(tmp_image_name, image_name, cmd)) sys.exit(-1) print( "Stage 8 :\n\tRename 'tmp_disk' with old disk image name > (Completed)" ) logger.info( "Stage 8 :\n\tRename 'tmp_disk' with old disk image name > (Completed)" ) time.sleep(5) jm = JobManager() id = jm.add_job(JobType.DELETE_DISK, new_image_name + ' ' + pool_name) print("Stage 9 :\n\tStart a job to remove old disk image , job id = " + str(id)) logger.info( "Stage 9 :\n\tStart a job to remove old disk image , job id = " + str(id)) sys.exit(0) except PoolException as e: print("Error : PoolException , {}".format(e.message)) logger.error("Clear Disk Error : PoolException , {}".format(e.message)) sys.exit(-1) except DiskListException as e: print("Error : DiskListException , {}".format(e.message)) logger.error("Clear Disk Error : DiskListException , {}".format( e.message)) sys.exit(-1) except CephException as e: if e.id == CephException.GENERAL_EXCEPTION: print("Error : CephException , {}".format(e.message)) logger.error("Clear Disk Error : CephException , {}".format(e.message)) sys.exit(-1) except MetadataException as e: print("Error : MetadataException , {}".format(e.message)) logger.error("Clear Disk Error : MetadataException , {}".format( e.message)) sys.exit(-1) except Exception as e: print("Error : Exception , {}".format(e.message)) logger.error("Clear Disk Error : Exception , {}".format(e.message)) sys.exit(-1)
modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.common.cmd import * from PetaSAN.core.entity.cluster import NodeInfo import json from PetaSAN.core.config.api import ConfigAPI config = configuration() node = config.get_node_info() cluster = config.get_cluster_info() conf = configuration() cluster_info = conf.get_cluster_info() first_cluster_node = cluster_info.management_nodes[0] first_node_info = NodeInfo() first_node_info.load_json(json.dumps(first_cluster_node)) second_cluster_node = cluster_info.management_nodes[1] second_node_info = NodeInfo() second_node_info.load_json(json.dumps(second_cluster_node)) call_cmd('python' + ConfigAPI().get_consul_start_up_script_path() +
def __init__(self): self.node_name = configuration().get_node_name() self.max_limit = self.MAX_SEND_COUNT self.metrics_list = []
def __get_pre_config_disks(): disks = PreConfigStorageDisks() try: with open(ConfigAPI().get_node_pre_config_disks(), 'r') as f: data = json.load(f) disks.load_json(json.dumps(data)) return disks except: return disks # print subprocess.call("ceph-disk prepare --cluster ceph --zap-disk --fs-type xfs /dev/sdj /dev/sdh",shell=True) cluster_name = configuration().get_cluster_name() status = StatusReport() status.success = False try: cm = CacheManager() node_name = configuration().get_node_info().name storage_engine = configuration().get_cluster_info().storage_engine if configuration().get_node_info().is_storage: disks = __get_pre_config_disks() if len(disks.journals) > 0: for d in disks.journals: ceph_disk_lib.clean_disk(d) add_journal(d)