def set_cluster_name(self, cluster_name): cluster = ClusterInfo() config = ConfigAPI() cluster.name = cluster_name if not os.path.exists(os.path.dirname(config.get_cluster_info_file_path())): os.makedirs(os.path.dirname(config.get_cluster_info_file_path())) with open(config.get_cluster_info_file_path(), 'w', ) as f: f.write(cluster.write_json())
def get_cluster_info(self): config = ConfigAPI() with open(config.get_cluster_info_file_path(), 'r') as f: data = json.load(f) cluster = ClusterInfo() cluster.load_json(json.dumps(data)) return cluster
def __sync_cluster_config_file(self): try: manage_conf = configuration() current_node_name = manage_conf.get_node_info().name cluster_info = manage_conf.get_cluster_info() config_api = ConfigAPI() for i in cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) if node_info.name != current_node_name: ssh_obj = ssh() if not ssh_obj.copy_file_to_host( node_info.management_ip, config_api.get_cluster_info_file_path()): logger.error( "Could not copy configuration file to {} server.". format(node_info.name)) self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_sync_config_file") return False except Exception as ex: logger.exception(ex.message) self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_sync_config_file") return False # copy_file_to_host return True
def set_cluster_network_info(self, cluster_info): """ :type cluster_info: ClusterInfo """ cluster_info.name = self.get_cluster_name(True) config = ConfigAPI() with open(config.get_cluster_info_file_path(), 'w', ) as f: f.write(cluster_info.write_json())
def join(self, ip, password): config = configuration() ssh_obj = ssh() config_api = ConfigAPI() if os.path.exists(config_api.get_cluster_info_file_path()): os.remove(config_api.get_cluster_info_file_path()) Network().clean_bonding() logger.info("Starting node join") if ssh_obj.copy_public_key_from_host(ip, password): logger.info("Successfully copied public keys.") if ssh_obj.copy_private_key_from_host(ip, password): ssh_obj.create_authorized_key_file() logger.info("Successfully copied private keys.") config.set_password(password) logger.info("password set successfully.") else: raise SSHKeyException( "Error while copying keys or setting password.") if not ssh_obj.call_command( ip, "python {}".format( config_api.get_cluster_status_for_join_path())): raise JoinException("ceph monitor status not healthy.") if not os.listdir( os.path.dirname(config_api.get_cluster_info_file_path())): os.makedirs( os.path.dirname(config_api.get_cluster_info_file_path())) logger.info("Start copying cluster info file.") if not ssh_obj.copy_file_from_host( ip, config_api.get_cluster_info_file_path()): raise Exception("Error while copy cluster info file.") logger.info("Successfully copied cluster info file.") cluster_name = config.get_cluster_name(True) logger.info("Joined cluster {}".format(cluster_name)) self.__copy_current_tunings(ip) return cluster_name
#!/usr/bin/python ''' Copyright (C) 2019 Maged Mokhtar <mmokhtar <at> petasan.org> Copyright (C) 2019 PetaSAN www.petasan.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.config.api import ConfigAPI cluster_info = configuration().get_cluster_info() cluster_info.name = "ceph" config = ConfigAPI() with open( config.get_cluster_info_file_path(), 'w', ) as f: f.write(cluster_info.write_json())
def replace(self, ip, password): config = configuration() ssh_obj = ssh() config_api = ConfigAPI() logger.info("Starting replace.") if os.path.exists(config_api.get_cluster_info_file_path()): os.remove(config_api.get_cluster_info_file_path()) if ssh_obj.copy_public_key_from_host(ip, password): logger.info("Successfully copied public keys.") if ssh_obj.copy_private_key_from_host(ip, password): ssh_obj.create_authorized_key_file() logger.info("Successfully copied private keys.") else: raise SSHKeyException("Error copying keys") out, err = ssh_obj.exec_command( ip, "python {}".format(config_api.get_cluster_status_for_join_path())) out = int(out) if out == -1: raise ReplaceException("core_deploy_replace_mon_not_healthy_err") elif out == 0: raise ReplaceException( "core_deploy_replace_cluster_in_progress_err") elif out == 1: raise ReplaceException( "core_deploy_replace_two_management_node_down_err") elif out == 3: raise ReplaceException("core_deploy_replace_cluster_running_err") if not os.listdir( os.path.dirname(config_api.get_cluster_info_file_path())): os.makedirs( os.path.dirname(config_api.get_cluster_info_file_path())) logger.info("Starting to copy config file") if not ssh_obj.copy_file_from_host( ip, config_api.get_cluster_info_file_path()): raise Exception("Error copying config file") logger.info("Successfully copied config file.") cluster_name = config.get_cluster_name(True) logger.info("Successfully joined to cluster {}".format(cluster_name)) wrong_name = True wrong_ip = True for node_info in config.get_management_nodes_config(): if node_info.name == config.get_node_name( ) or node_info.management_ip == Network().get_node_management_ip(): if node_info.name == config.get_node_name(): wrong_name = False if node_info.management_ip == Network().get_node_management_ip( ): wrong_ip = False if not wrong_name and not wrong_ip: config.set_node_info(node_info, True) open(config_api.get_replace_file_path(), 'w+').close() break if wrong_name and wrong_ip: os.remove(config_api.get_cluster_info_file_path()) raise ReplaceException("core_deploy_replace_node_do_not_match_err") elif wrong_name: os.remove(config_api.get_cluster_info_file_path()) raise ReplaceException( "core_deploy_replace_node_do_not_match_name_err") elif wrong_ip: os.remove(config_api.get_cluster_info_file_path()) raise ReplaceException( "core_deploy_replace_node_do_not_match_ip_err") config.set_password(password) logger.info("password set successfully.") self.__copy_current_tunings(ip) return cluster_name
def build(self): try: self.__status_report = StatusReport() conf = configuration() if len(conf.get_cluster_info().management_nodes) == 0: node_num = len(conf.get_cluster_info().management_nodes) + 1 self.__status_report.nod_num = node_num NTPConf().setup_ntp_local() if conf.add_management_node() != Status().done: self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_cant_add_node") logger.info( "Node 1 added, cluster requires 2 other nodes to build.") self.run_post_deploy_script() return BuildStatus().OneManagementNode elif len(conf.get_cluster_info().management_nodes) == 1: node_num = len(conf.get_cluster_info().management_nodes) + 1 self.__status_report.nod_num = node_num connection_status = self.check_connections() if not connection_status.success: self.__status_report.failed_tasks.extend( connection_status.failed_tasks) logger.error("Connection ping error.") logger.error(self.__status_report.failed_tasks) return BuildStatus().connection_error NTPConf().setup_ntp_local() if conf.add_management_node() != Status().done: self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_cant_add_node") return BuildStatus().error if not self.__sync_cluster_config_file(): return BuildStatus().error logger.info( "Node 2 is added, cluster requires 1 other node to build.") self.run_post_deploy_script() return BuildStatus().TwoManagementNodes elif len(conf.get_cluster_info().management_nodes) == 2: node_num = len(conf.get_cluster_info().management_nodes) + 1 self.__status_report.nod_num = node_num connection_status = self.check_connections() if not connection_status.success: self.__status_report.failed_tasks.extend( connection_status.failed_tasks) logger.error("Connection ping error.") logger.error(self.__status_report.failed_tasks) return BuildStatus().connection_error status = self.check_remote_connection() if not status.success: self.__status_report = status return BuildStatus().error NTPConf().setup_ntp_local() logger.info("Stopping petasan services on all nodes.") self.stop_petasan_services() logger.info("Starting local clean_ceph.") clean_ceph() logger.info("Starting local clean_consul.") clean_consul() status = build_consul() if not status.success: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.error("Could not build consul.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_consul_error status = build_monitors() if not status.success: self.__status_report = status logger.error("Could not build ceph monitors.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_monitors_error status = build_osds() if not status.success: self.__status_report = status logger.error("Could not build ceph OSDs.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_osd_error else: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.info("Main core components deployed.") if not self.__commit_management_nodes(): self.__status_report.success = False logger.error("Could not commit node.") self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_commit_node") logger.error(self.__status_report.failed_tasks) return BuildStatus().error logger.info("Starting all services.") self.start_petasan_services() if not self.add__node_to_hosts_file(): self.__status_report.success = False logger.error("Could not add node to hosts file.") self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_add_node_hosts") logger.error(self.__status_report.failed_tasks) return BuildStatus().error SharedFS().setup_management_nodes() if conf.add_management_node() != Status().done: self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_add_node_config") logger.error(self.__status_report.failed_tasks) return BuildStatus().error logger.info("Updating rbd pool.") if not create_rbd_pool(): self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_update_rbd") logger.error(self.__status_report.failed_tasks) return BuildStatus().error logger.info("Creating EC Profiles.") if not create_ec_profiles(): self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_create_ec_profiles") logger.error(self.__status_report.failed_tasks) return BuildStatus().error logger.info( "Waiting for ceph to reach active and clean status.") test_active_clean() if not self.__sync_cluster_config_file(): return BuildStatus().error self.run_post_deploy_script() self.kill_petasan_console(True) logger.info("Node 3 added and cluster is now ready.") elif len( conf.get_cluster_info().management_nodes ) == 3 and not os.path.exists(ConfigAPI().get_replace_file_path()): # ------------------------------ Join ------------------------------ # # ------------------------------------------------------------------ # node_num = len(conf.get_cluster_info().management_nodes) + 1 self.__status_report.nod_num = node_num logger.info("Joining node to running cluster.") connection_status = self.check_connections() if not connection_status.success: self.__status_report.failed_tasks.extend( connection_status.failed_tasks) logger.error("Connection ping error.") logger.error(self.__status_report.failed_tasks) return BuildStatus().connection_error status = self.check_remote_connection() NTPConf().setup_ntp_local() if not status.success: self.__status_report = status return BuildStatus().error logger.info("Stopping petasan services on local node.") self.stop_petasan_services(remote=False) logger.info("Starting local clean_ceph.") clean_ceph_local() logger.info("Starting local clean_consul.") clean_consul_local() status = build_consul_client() if not status.success: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.error("Could not build consul client.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_consul_error status = copy_ceph_config_from_mon() if not status.success: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.error("Could not copy ceph config.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_consul_error status = create_osds_local() if not status.success: self.__status_report = status logger.error("Could not build ceph OSDs.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_osd_error else: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.info("Main core components deployed.") logger.info("Staring all services") self.start_petasan_services(remote=False) test_active_clean() if not self.__commit_local_node(): test_active_clean() if not self.__commit_local_node(): self.__status_report.success = False logger.error("Could not commit node.") self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_commit_node_join") logger.error(self.__status_report.failed_tasks) os.remove(ConfigAPI().get_cluster_info_file_path()) return BuildStatus().error if not self.add__node_to_hosts_file(remote=False): test_active_clean() if not self.add__node_to_hosts_file(remote=False): self.__status_report.success = False logger.error("Could not add node to hosts file.") self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_add_node_hosts") logger.error(self.__status_report.failed_tasks) os.remove(ConfigAPI().get_cluster_info_file_path()) return BuildStatus().error logger.info("Node successfully joined to cluster.") self.kill_petasan_console(False) if os.path.exists(ConfigAPI().get_replace_file_path()): os.remove(ConfigAPI().get_replace_file_path()) self.run_post_deploy_script() return BuildStatus().done_joined elif len(conf.get_cluster_info().management_nodes ) == 3 and os.path.exists( ConfigAPI().get_replace_file_path()): # ----------------------------- Replace ---------------------------- # # ------------------------------------------------------------------ # node_num = len(conf.get_cluster_info().management_nodes) + 1 self.__status_report.nod_num = node_num logger.info("Replace node is starting.") connection_status = self.check_connections() if not connection_status.success: self.__status_report.failed_tasks.extend( connection_status.failed_tasks) logger.error("Connection ping error.") logger.error(self.__status_report.failed_tasks) return BuildStatus().connection_error status = self.check_remote_connection() NTPConf().setup_ntp_local() if not status.success: self.__status_report = status return BuildStatus().error logger.info("Stopping petasan services on local node.") self.stop_petasan_services(remote=False) logger.info("Starting clean_ceph.") clean_ceph_local() logger.info("Starting local clean_consul.") clean_consul_local() status = replace_consul_leader() if not status.success: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.error("Could not replace consul leader.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_consul_error status = replace_local_monitor() if not status.success: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.error(self.__status_report.failed_tasks) return BuildStatus().build_monitors_error status = create_osds_local() if not status.success: self.__status_report = status logger.error("Could not build ceph OSDs.") logger.error(self.__status_report.failed_tasks) return BuildStatus().build_osd_error else: self.__status_report.failed_tasks.extend( status.failed_tasks) logger.info("Main core components deployed.") logger.info("Starting all services.") self.start_petasan_services(remote=False) test_active_clean() SharedFS().rebuild_management_node() logger.info("Node successfully added to cluster.") self.run_post_deploy_script() self.kill_petasan_console(False) os.remove(ConfigAPI().get_replace_file_path()) return BuildStatus().done_replace except Exception as ex: config_api = ConfigAPI() if os.path.exists(config_api.get_cluster_info_file_path()): os.remove(config_api.get_cluster_info_file_path()) logger.exception(ex.message) return BuildStatus().error return BuildStatus().done