def __cleanup(self): cmd = ConfigAPI().get_benchmark_script_path( ) + ' clean -p ' + self.pool for node in self.clients: try: # run clean as synchronous command logger.info('Running benchmark clean command on node:' + node + ' cmd:' + cmd) ssh().exec_command(node, cmd) except Exception as e: logger.error('Error running benchmark clean command on node:' + node)
def create_osds_remote(remote_mons_ips_ls): config_api = ConfigAPI() remote_status = StatusReport() for remot_mon in remote_mons_ips_ls: ssh_obj = ssh() status = StatusReport() out, err = ssh_obj.exec_command( remot_mon, " python {} ".format(config_api.get_node_create_osd_script_path())) logger.info(" ".join([remot_mon, out])) if "/report/" in out: # To avoid -- IndexError: list index out of range status.load_json(str(out.split("/report/")[1])) else: if err: status.load_json("Status Report Error , error : {}".format( str(err))) else: status.load_json("Connection Error.") remote_status.failed_tasks.extend(status.failed_tasks) if not status.success: logger.error( "Cannot create osd for remote node {}".format(remot_mon)) remote_status.success = False return remote_status return remote_status
def copy_ceph_config_from_mon(): cluster_config = configuration() cluster_name = cluster_config.get_cluster_name() ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name) ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name) remot_mon_ip = cluster_config.get_remote_ips( cluster_config.get_node_info().name)[0] status = StatusReport() ssh_obj = ssh() config_api = ConfigAPI() if not os.path.exists(config_api.get_cluster_ceph_dir_path()): os.makedirs(config_api.get_cluster_ceph_dir_path(), exist_ok=True) if not os.path.exists("/var/lib/ceph/bootstrap-osd/"): os.makedirs("/var/lib/ceph/bootstrap-osd/") if not ssh_obj.copy_file_from_host(remot_mon_ip, "{}".format(ceph_client_admin_keyring)): logger.error("Cannot copy {} from {}".format(ceph_client_admin_keyring, remot_mon_ip)) status.success = False elif not ssh_obj.copy_file_from_host( remot_mon_ip, "/etc/ceph/{}.conf".format(cluster_name)): logger.error("Cannot copy ceph.conf from {}".format(remot_mon_ip)) status.success = False elif not ssh_obj.copy_file_from_host( remot_mon_ip, "/var/lib/ceph/bootstrap-osd/{}.keyring".format(cluster_name)): logger.error("Cannot copy ceph.keyring from {}".format(remot_mon_ip)) status.success = False return status
def get_node_log(self, node_name): ssh_obj = ssh() cmd = "python {} {} ".format( ConfigAPI().get_admin_manage_node_script(), "node-log") stdout, stderr = ssh_obj.exec_command(node_name, cmd) # stdout,stderr =exec_command(cmd) return stdout
def __sync_cluster_config_file(self): try: manage_conf = configuration() current_node_name = manage_conf.get_node_info().name cluster_info = manage_conf.get_cluster_info() config_api = ConfigAPI() for i in cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) if node_info.name != current_node_name: ssh_obj = ssh() if not ssh_obj.copy_file_to_host( node_info.management_ip, config_api.get_cluster_info_file_path()): logger.error( "Could not copy configuration file to {} server.". format(node_info.name)) self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_sync_config_file") return False except Exception as ex: logger.exception(ex.message) self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_sync_config_file") return False # copy_file_to_host return True
def delete_cache(self, node_name, disk_name): ssh_obj = ssh() cmd = "python {} -disk_name {}".format( ConfigAPI().get_admin_delete_cache_job_script(), disk_name) stdout, stderr = ssh_obj.exec_command(node_name, cmd) logger.info("Start delete cache job {} ".format(stdout)) return stdout
def get_security_key_(): # get the security code from the server we're connecting to ssh_exec = ssh() conf = configuration() cluster_info = conf.get_cluster_info() for cluster_node in cluster_info.management_nodes: remote_node_info = NodeInfo() remote_node_info.load_json(json.dumps(cluster_node)) if remote_node_info.management_ip == conf.get_node_info( ).management_ip: continue command_result, err = ssh_exec.exec_command( remote_node_info.management_ip, 'python ' + ConfigAPI().get_consul_encryption_key_script()) if err is not None and str(err) != "": logger.error("Could not read Consul encryption key from node: " + remote_node_info.management_ip) logger.error(err) print('command_result: ', command_result) else: key = str(command_result.splitlines()[0]) if key is not None and key != "": return key return None
def clean_ceph_remote(ips): config_api = ConfigAPI() for remot_node in ips: ssh_obj = ssh() ssh_obj.call_command( remot_node, " python {} ".format(config_api.get_node_clean_script_path()))
def delete_osd(self, node_name, disk_name, osd_id): ssh_obj = ssh() cmd = "python {} -id {} -disk_name {}".format( ConfigAPI().get_admin_delete_osd_job_script(), osd_id, disk_name) # stdout,stderr =exec_command(cmd) stdout, stderr = ssh_obj.exec_command(node_name, cmd) logger.info("Start delete osd job {} ".format(stdout)) return stdout
def has_valid_journal(self, node_name): ssh_obj = ssh() cmd = "python {} {}".format(ConfigAPI().get_admin_manage_node_script(), "valid-journal") stdout, stderr = ssh_obj.exec_command(node_name, cmd) if 'None' in stdout: return False return True
def add_journal(self, node_name, disk_name): ssh_obj = ssh() cmd = "python {} -disk_name {}".format( ConfigAPI().get_admin_add_journal_job_script(), disk_name) # stdout,stderr =exec_command(cmd)# for test local stdout, stderr = ssh_obj.exec_command(node_name, cmd) logger.info("Start add journal job {} ".format(stdout)) return stdout
def clean_consul_remote(): conf = configuration() ssh_exec = ssh() for ip in conf.get_remote_ips(conf.get_node_name()): logger.info("Trying to clean Consul on {}".format(ip)) ssh_exec.call_command( ip, 'python ' + ConfigAPI().get_consul_stop_script_path()) ssh_exec.call_command( ip, 'python ' + ConfigAPI().get_consul_clean_script_path())
def __copy_current_tunings(self, ip): config_api = ConfigAPI() ssh_obj = ssh() path = config_api.get_current_tunings_path() post_deploy_script_path = path + config_api.get_post_deploy_script_file_name( ) ceph_path = path + config_api.get_ceph_tunings_file_name() lio_path = path + config_api.get_lio_tunings_file_name() ssh_obj.copy_file_from_host(ip, post_deploy_script_path) ssh_obj.copy_file_from_host(ip, ceph_path) ssh_obj.copy_file_from_host(ip, lio_path)
def set_ntp_server_remote(self, server): cluster_info = configuration().get_cluster_info() if len(cluster_info.management_nodes) == 0: return None node = cluster_info.management_nodes[0] ip = node['management_ip'] _ssh = ssh() ret = _ssh.call_command(ip, SET_NTP_SERVER_REMOTE_SCRIPT + ' ' + server) return ret
def __fencing(self, node_name): maintenance = ManageMaintenance() if maintenance.get_maintenance_config( ).fencing == MaintenanceConfigState.off: logger.warning( "Fencing action will not fire the admin stopped it,the cluster is in maintenance mode." ) return node_list = ConsulAPI().get_node_list() for node in node_list: if str(node.name) == node_name: if Network().ping(node.backend_2_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.backend_2_ip)) ssh().call_command(node.backend_2_ip, " poweroff ", 5) break elif Network().ping(node.management_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.management_ip)) ssh().call_command(node.management_ip, " poweroff ", 5) break elif Network().ping(node.backend_1_ip): logger.info("This node will stop node {}/{}.".format( node_name, node.backend_1_ip)) ssh().call_command(node.backend_1_ip, " poweroff ", 5) break
def check_remote_connection(self): cluster_conf = configuration() ips = cluster_conf.get_remote_ips(cluster_conf.get_node_name()) ssh_obj = ssh() status = StatusReport() for i in ips: if not ssh_obj.check_ssh_connection(str(i)): status.success = False status.failed_tasks.append( "core_cluster_deploy_ip_couldnt_connect" + "%" + i) return status return status
def is_journal_space_avail(self, node_name, disk_name): ssh_obj = ssh() config = configuration() cluster_name = config.get_cluster_name() cmd = "python {} {} {}".format( ConfigAPI().get_admin_manage_node_script(), "disk-avail-space -disk_name", disk_name) stdout, stderr = ssh_obj.exec_command(node_name, cmd) free_disk_space = float(re.findall(r'-?\d+\.?\d*', stdout)[0]) bluestore_block_db_size = get_journal_size() if free_disk_space > bluestore_block_db_size: return True return False
def has_valid_cache(self, node_name): ssh_obj = ssh() config = configuration() cluster_name = config.get_cluster_name() cmd = "python {} {}".format(ConfigAPI().get_admin_manage_node_script(), "valid-cache") stdout, stderr = ssh_obj.exec_command(node_name, cmd) if 'None' in stdout: return False else: return True
def is_cache_partition_avail(self, node_name, disk_name): ssh_obj = ssh() config = configuration() cluster_name = config.get_cluster_name() cmd = "python {} {} {}".format( ConfigAPI().get_admin_manage_node_script(), "cache-partition-avail -disk_name", disk_name) stdout, stderr = ssh_obj.exec_command(node_name, cmd) if 'True' in stdout: return True return False
def get_disks_health(self, node_name): ssh_obj = ssh() disks_health = {} cmd = "python {} {}".format(ConfigAPI().get_admin_manage_node_script(), "disk-health") stdout, stderr = ssh_obj.exec_command(node_name, cmd) # stdout,stderr =exec_command(cmd) if stderr and str(stderr).lower().find("Warning".lower()) == -1: logger.error(stderr) return disks_health else: disks_health = json.loads(str(stdout)) return disks_health
def collect_remote_nodes_state(self): script_path = ConfigAPI().get_collect_state_script() if not os.path.exists("{}".format(ConfigAPI().get_collect_state_dir())): os.system("mkdir {}".format(ConfigAPI().get_collect_state_dir())) cmd = "python {}".format(script_path) ssh_obj= ssh() remote_nodes = configuration().get_remote_nodes_config(configuration().get_node_name()) for node in remote_nodes: if ssh_obj.call_command(node.management_ip,cmd): logger.info("execute collect script on {}".format(node.name)) compress_file=ConfigAPI().get_collect_state_dir()+node.name+".tar" if not ssh_obj.copy_file_from_host(node.management_ip,compress_file): logger.error("error copy files from remote nodes") return False return True
def __create_leader_conf_remotely(key_gen, cluster_info, local_node_info): ssh_exec = ssh() for cluster_node in cluster_info.management_nodes: remote_node_info = NodeInfo() remote_node_info.load_json(json.dumps(cluster_node)) if local_node_info.backend_1_ip != remote_node_info.backend_1_ip: command_result = ssh_exec.call_command( remote_node_info.backend_1_ip, 'python ' + ConfigAPI().get_consul_create_conf_script() + ' -key="' + key_gen + '"') if command_result is False: logger.error( "Could not create Consul Configuration on node: " + remote_node_info.backend_1_ip) return command_result return True
def kill_petasan_console(self, remote=True): cluster_conf = configuration() ssh_obj = ssh() exec_command("python {} ".format( ConfigAPI().get_kill_console_script_path())) if not remote: return try: for ip in cluster_conf.get_remote_ips( cluster_conf.get_node_name()): ssh_obj.exec_command( ip, "python {} ".format( ConfigAPI().get_kill_console_script_path())) except Exception as ex: logger.exception(ex.message) raise ex
def get_ntp_server_remote(self): cluster_info = configuration().get_cluster_info() if len(cluster_info.management_nodes) == 0: return None node = cluster_info.management_nodes[0] ip = node['management_ip'] _ssh = ssh() remote_ret = _ssh.get_remote_object(ip, GET_NTP_SERVER_REMOTE_SCRIPT) if remote_ret is None: return None if remote_ret['success'] is False: return None return remote_ret['ntp_server']
def stop_petasan_services(self, remote=True): logger.info("Stopping all petasan services.") cluster_conf = configuration() ssh_obj = ssh() exec_command("python {} ".format( ConfigAPI().get_stop_petasan_services_path())) if not remote: return try: for ip in cluster_conf.get_remote_ips( cluster_conf.get_node_name()): ssh_obj.exec_command( ip, "python {}".format( ConfigAPI().get_stop_petasan_services_path())) except Exception as ex: logger.exception(ex.message) raise ex
def start_petasan_services(self, remote=True): cluster_conf = configuration() ssh_obj = ssh() exec_command("python {} build ".format( ConfigAPI().get_startup_petasan_services_path())) sleep(5) if not remote: return try: for ip in cluster_conf.get_remote_ips( cluster_conf.get_node_name()): ssh_obj.exec_command( ip, "python {} build ".format( ConfigAPI().get_startup_petasan_services_path())) sleep(5) except Exception as ex: logger.exception(ex.message) raise ex
def update_node_role(self, node_name, is_storage=-1, is_iscsi=-1, is_backup=-1): ssh_obj = ssh() 'update-role -is_iscsi 1 -is_storage 1' cmd = "python {} update-role -is_iscsi {} -is_storage {} -is_backup {}".format( ConfigAPI().get_admin_manage_node_script(), is_iscsi, is_storage, is_backup) stdout, stderr = ssh_obj.exec_command(node_name, cmd) # stdout,stderr =exec_command(cmd) if stderr: return None else: if str(stdout) == "1" or stdout == None: return elif str(stdout) == "-1": raise Exception('Error update node roles.')
def join(self, ip, password): config = configuration() ssh_obj = ssh() config_api = ConfigAPI() if os.path.exists(config_api.get_cluster_info_file_path()): os.remove(config_api.get_cluster_info_file_path()) Network().clean_bonding() logger.info("Starting node join") if ssh_obj.copy_public_key_from_host(ip, password): logger.info("Successfully copied public keys.") if ssh_obj.copy_private_key_from_host(ip, password): ssh_obj.create_authorized_key_file() logger.info("Successfully copied private keys.") config.set_password(password) logger.info("password set successfully.") else: raise SSHKeyException( "Error while copying keys or setting password.") if not ssh_obj.call_command( ip, "python {}".format( config_api.get_cluster_status_for_join_path())): raise JoinException("ceph monitor status not healthy.") if not os.listdir( os.path.dirname(config_api.get_cluster_info_file_path())): os.makedirs( os.path.dirname(config_api.get_cluster_info_file_path())) logger.info("Start copying cluster info file.") if not ssh_obj.copy_file_from_host( ip, config_api.get_cluster_info_file_path()): raise Exception("Error while copy cluster info file.") logger.info("Successfully copied cluster info file.") cluster_name = config.get_cluster_name(True) logger.info("Joined cluster {}".format(cluster_name)) self.__copy_current_tunings(ip) return cluster_name
def create_cluster_info(self, password, cluster_name): config = configuration() ssh_obj = ssh() try: ssh_obj.create_id(True) ssh_obj.create_authorized_key_file() logger.info("Created keys for cluster {}".format(cluster_name)) config.set_cluster_name(cluster_name) logger.info( "Created cluster file and set cluster name to {}".format( cluster_name)) Network().clean_bonding() if not config.set_password(password): logger.error("Could not set root password.") return Status().error logger.info("password set successfully.") except Exception as ex: logger.exception(ex.message) return Status().error return Status().done
def get_disk_list(self, node_name, pid): """ DOCSTRING : this function is called to execute ssh command to get all disks from other nodes. Args : node_name (string), pid (integer) Returns : all disks on node_name """ ssh_obj = ssh() cmd = "python {} {} {}".format( ConfigAPI().get_admin_manage_node_script(), "disk-list -pid", pid) stdout, stderr = ssh_obj.exec_command(node_name, cmd) # stdout,stderr =exec_command(cmd) if stderr and str(stderr).lower().find("Warning".lower()) == -1: logger.error(stderr) return None else: data = json.loads(str(stdout)) disk_list = [] for i in data: disk_info = DiskInfo() disk_info.load_json(json.dumps(i)) disk_list.append(disk_info) return disk_list