Exemple #1
0
    def set_cluster_name(self, cluster_name):
        cluster = ClusterInfo()
        config = ConfigAPI()
        cluster.name = cluster_name

        if not os.path.exists(os.path.dirname(config.get_cluster_info_file_path())):
            os.makedirs(os.path.dirname(config.get_cluster_info_file_path()))

        with open(config.get_cluster_info_file_path(), 'w', ) as f:
            f.write(cluster.write_json())
Exemple #2
0
 def get_cluster_info(self):
     config = ConfigAPI()
     with open(config.get_cluster_info_file_path(), 'r') as f:
         data = json.load(f)
         cluster = ClusterInfo()
         cluster.load_json(json.dumps(data))
         return cluster
Exemple #3
0
    def __sync_cluster_config_file(self):
        try:
            manage_conf = configuration()
            current_node_name = manage_conf.get_node_info().name
            cluster_info = manage_conf.get_cluster_info()
            config_api = ConfigAPI()

            for i in cluster_info.management_nodes:
                node_info = NodeInfo()
                node_info.load_json(json.dumps(i))

                if node_info.name != current_node_name:
                    ssh_obj = ssh()
                    if not ssh_obj.copy_file_to_host(
                            node_info.management_ip,
                            config_api.get_cluster_info_file_path()):
                        logger.error(
                            "Could not copy configuration file to {} server.".
                            format(node_info.name))
                        self.__status_report.success = False
                        self.__status_report.failed_tasks.append(
                            "core_cluster_deploy_couldnt_sync_config_file")
                        return False

        except Exception as ex:
            logger.exception(ex.message)
            self.__status_report.success = False
            self.__status_report.failed_tasks.append(
                "core_cluster_deploy_couldnt_sync_config_file")
            return False

        # copy_file_to_host
        return True
Exemple #4
0
    def set_cluster_network_info(self, cluster_info):
        """
        :type cluster_info: ClusterInfo
        """
        cluster_info.name = self.get_cluster_name(True)
        config = ConfigAPI()

        with open(config.get_cluster_info_file_path(), 'w', ) as f:
            f.write(cluster_info.write_json())
Exemple #5
0
    def join(self, ip, password):
        config = configuration()
        ssh_obj = ssh()
        config_api = ConfigAPI()

        if os.path.exists(config_api.get_cluster_info_file_path()):
            os.remove(config_api.get_cluster_info_file_path())
        Network().clean_bonding()
        logger.info("Starting node join")

        if ssh_obj.copy_public_key_from_host(ip, password):
            logger.info("Successfully copied public keys.")
            if ssh_obj.copy_private_key_from_host(ip, password):
                ssh_obj.create_authorized_key_file()
                logger.info("Successfully copied private keys.")
                config.set_password(password)
                logger.info("password set successfully.")

        else:
            raise SSHKeyException(
                "Error while copying keys or setting password.")

        if not ssh_obj.call_command(
                ip, "python {}".format(
                    config_api.get_cluster_status_for_join_path())):
            raise JoinException("ceph monitor status not healthy.")

        if not os.listdir(
                os.path.dirname(config_api.get_cluster_info_file_path())):
            os.makedirs(
                os.path.dirname(config_api.get_cluster_info_file_path()))
        logger.info("Start copying  cluster info file.")

        if not ssh_obj.copy_file_from_host(
                ip, config_api.get_cluster_info_file_path()):
            raise Exception("Error while copy cluster info file.")
        logger.info("Successfully copied cluster info file.")

        cluster_name = config.get_cluster_name(True)
        logger.info("Joined cluster {}".format(cluster_name))
        self.__copy_current_tunings(ip)
        return cluster_name
Exemple #6
0
#!/usr/bin/python
'''
 Copyright (C) 2019 Maged Mokhtar <mmokhtar <at> petasan.org>
 Copyright (C) 2019 PetaSAN www.petasan.org


 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU Affero General Public License
 as published by the Free Software Foundation

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU Affero General Public License for more details.
'''

from PetaSAN.core.cluster.configuration import configuration
from PetaSAN.core.config.api import ConfigAPI

cluster_info = configuration().get_cluster_info()
cluster_info.name = "ceph"
config = ConfigAPI()

with open(
        config.get_cluster_info_file_path(),
        'w',
) as f:
    f.write(cluster_info.write_json())
Exemple #7
0
    def replace(self, ip, password):
        config = configuration()
        ssh_obj = ssh()
        config_api = ConfigAPI()
        logger.info("Starting replace.")
        if os.path.exists(config_api.get_cluster_info_file_path()):
            os.remove(config_api.get_cluster_info_file_path())

        if ssh_obj.copy_public_key_from_host(ip, password):
            logger.info("Successfully copied public keys.")
            if ssh_obj.copy_private_key_from_host(ip, password):
                ssh_obj.create_authorized_key_file()
                logger.info("Successfully copied private keys.")

        else:
            raise SSHKeyException("Error copying keys")

        out, err = ssh_obj.exec_command(
            ip,
            "python {}".format(config_api.get_cluster_status_for_join_path()))
        out = int(out)
        if out == -1:
            raise ReplaceException("core_deploy_replace_mon_not_healthy_err")
        elif out == 0:
            raise ReplaceException(
                "core_deploy_replace_cluster_in_progress_err")
        elif out == 1:
            raise ReplaceException(
                "core_deploy_replace_two_management_node_down_err")
        elif out == 3:
            raise ReplaceException("core_deploy_replace_cluster_running_err")

        if not os.listdir(
                os.path.dirname(config_api.get_cluster_info_file_path())):
            os.makedirs(
                os.path.dirname(config_api.get_cluster_info_file_path()))

        logger.info("Starting to copy config file")
        if not ssh_obj.copy_file_from_host(
                ip, config_api.get_cluster_info_file_path()):
            raise Exception("Error copying  config file")

        logger.info("Successfully copied config file.")
        cluster_name = config.get_cluster_name(True)
        logger.info("Successfully joined to cluster {}".format(cluster_name))

        wrong_name = True
        wrong_ip = True
        for node_info in config.get_management_nodes_config():
            if node_info.name == config.get_node_name(
            ) or node_info.management_ip == Network().get_node_management_ip():
                if node_info.name == config.get_node_name():
                    wrong_name = False

                if node_info.management_ip == Network().get_node_management_ip(
                ):
                    wrong_ip = False

                if not wrong_name and not wrong_ip:
                    config.set_node_info(node_info, True)
                    open(config_api.get_replace_file_path(), 'w+').close()
                break

        if wrong_name and wrong_ip:
            os.remove(config_api.get_cluster_info_file_path())
            raise ReplaceException("core_deploy_replace_node_do_not_match_err")
        elif wrong_name:
            os.remove(config_api.get_cluster_info_file_path())
            raise ReplaceException(
                "core_deploy_replace_node_do_not_match_name_err")
        elif wrong_ip:
            os.remove(config_api.get_cluster_info_file_path())
            raise ReplaceException(
                "core_deploy_replace_node_do_not_match_ip_err")

        config.set_password(password)
        logger.info("password set successfully.")
        self.__copy_current_tunings(ip)
        return cluster_name
Exemple #8
0
    def build(self):
        try:
            self.__status_report = StatusReport()
            conf = configuration()

            if len(conf.get_cluster_info().management_nodes) == 0:
                node_num = len(conf.get_cluster_info().management_nodes) + 1
                self.__status_report.nod_num = node_num
                NTPConf().setup_ntp_local()
                if conf.add_management_node() != Status().done:
                    self.__status_report.success = False
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_cant_add_node")

                logger.info(
                    "Node 1 added, cluster requires 2 other nodes to build.")
                self.run_post_deploy_script()
                return BuildStatus().OneManagementNode

            elif len(conf.get_cluster_info().management_nodes) == 1:
                node_num = len(conf.get_cluster_info().management_nodes) + 1
                self.__status_report.nod_num = node_num

                connection_status = self.check_connections()
                if not connection_status.success:
                    self.__status_report.failed_tasks.extend(
                        connection_status.failed_tasks)
                    logger.error("Connection ping error.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().connection_error

                NTPConf().setup_ntp_local()

                if conf.add_management_node() != Status().done:
                    self.__status_report.success = False
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_cant_add_node")
                    return BuildStatus().error
                if not self.__sync_cluster_config_file():
                    return BuildStatus().error

                logger.info(
                    "Node 2 is added, cluster requires 1 other node to build.")
                self.run_post_deploy_script()
                return BuildStatus().TwoManagementNodes

            elif len(conf.get_cluster_info().management_nodes) == 2:
                node_num = len(conf.get_cluster_info().management_nodes) + 1
                self.__status_report.nod_num = node_num

                connection_status = self.check_connections()
                if not connection_status.success:
                    self.__status_report.failed_tasks.extend(
                        connection_status.failed_tasks)
                    logger.error("Connection ping error.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().connection_error

                status = self.check_remote_connection()
                if not status.success:
                    self.__status_report = status
                    return BuildStatus().error

                NTPConf().setup_ntp_local()

                logger.info("Stopping petasan services on all nodes.")
                self.stop_petasan_services()
                logger.info("Starting local clean_ceph.")
                clean_ceph()
                logger.info("Starting local clean_consul.")
                clean_consul()

                status = build_consul()
                if not status.success:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)
                    logger.error("Could not build consul.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_consul_error

                status = build_monitors()
                if not status.success:
                    self.__status_report = status
                    logger.error("Could not build ceph monitors.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_monitors_error

                status = build_osds()
                if not status.success:
                    self.__status_report = status
                    logger.error("Could not build ceph OSDs.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_osd_error
                else:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)

                logger.info("Main core components deployed.")

                if not self.__commit_management_nodes():
                    self.__status_report.success = False
                    logger.error("Could not commit node.")
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_couldnt_commit_node")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().error

                logger.info("Starting all services.")
                self.start_petasan_services()

                if not self.add__node_to_hosts_file():
                    self.__status_report.success = False
                    logger.error("Could not add node to hosts file.")
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_couldnt_add_node_hosts")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().error

                SharedFS().setup_management_nodes()

                if conf.add_management_node() != Status().done:
                    self.__status_report.success = False
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_couldnt_add_node_config")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().error

                logger.info("Updating rbd pool.")
                if not create_rbd_pool():
                    self.__status_report.success = False
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_couldnt_update_rbd")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().error

                logger.info("Creating EC Profiles.")
                if not create_ec_profiles():
                    self.__status_report.success = False
                    self.__status_report.failed_tasks.append(
                        "core_cluster_deploy_couldnt_create_ec_profiles")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().error

                logger.info(
                    "Waiting for ceph to reach active and clean status.")
                test_active_clean()
                if not self.__sync_cluster_config_file():
                    return BuildStatus().error

                self.run_post_deploy_script()
                self.kill_petasan_console(True)
                logger.info("Node 3 added and cluster is now ready.")

            elif len(
                    conf.get_cluster_info().management_nodes
            ) == 3 and not os.path.exists(ConfigAPI().get_replace_file_path()):
                # ------------------------------ Join ------------------------------ #
                # ------------------------------------------------------------------ #
                node_num = len(conf.get_cluster_info().management_nodes) + 1
                self.__status_report.nod_num = node_num
                logger.info("Joining node to running cluster.")

                connection_status = self.check_connections()
                if not connection_status.success:
                    self.__status_report.failed_tasks.extend(
                        connection_status.failed_tasks)
                    logger.error("Connection ping error.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().connection_error

                status = self.check_remote_connection()
                NTPConf().setup_ntp_local()

                if not status.success:
                    self.__status_report = status
                    return BuildStatus().error

                logger.info("Stopping petasan services on local node.")
                self.stop_petasan_services(remote=False)
                logger.info("Starting local clean_ceph.")
                clean_ceph_local()
                logger.info("Starting local clean_consul.")
                clean_consul_local()

                status = build_consul_client()
                if not status.success:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)
                    logger.error("Could not build consul client.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_consul_error

                status = copy_ceph_config_from_mon()
                if not status.success:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)
                    logger.error("Could not copy ceph config.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_consul_error

                status = create_osds_local()
                if not status.success:
                    self.__status_report = status
                    logger.error("Could not build ceph OSDs.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_osd_error
                else:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)

                logger.info("Main core components deployed.")
                logger.info("Staring all services")
                self.start_petasan_services(remote=False)
                test_active_clean()
                if not self.__commit_local_node():
                    test_active_clean()
                    if not self.__commit_local_node():
                        self.__status_report.success = False
                        logger.error("Could not commit node.")
                        self.__status_report.failed_tasks.append(
                            "core_cluster_deploy_couldnt_commit_node_join")
                        logger.error(self.__status_report.failed_tasks)
                        os.remove(ConfigAPI().get_cluster_info_file_path())
                        return BuildStatus().error

                if not self.add__node_to_hosts_file(remote=False):
                    test_active_clean()
                    if not self.add__node_to_hosts_file(remote=False):
                        self.__status_report.success = False
                        logger.error("Could not add node to hosts file.")
                        self.__status_report.failed_tasks.append(
                            "core_cluster_deploy_couldnt_add_node_hosts")
                        logger.error(self.__status_report.failed_tasks)
                        os.remove(ConfigAPI().get_cluster_info_file_path())
                        return BuildStatus().error

                logger.info("Node successfully joined to cluster.")
                self.kill_petasan_console(False)
                if os.path.exists(ConfigAPI().get_replace_file_path()):
                    os.remove(ConfigAPI().get_replace_file_path())

                self.run_post_deploy_script()
                return BuildStatus().done_joined

            elif len(conf.get_cluster_info().management_nodes
                     ) == 3 and os.path.exists(
                         ConfigAPI().get_replace_file_path()):
                # ----------------------------- Replace ---------------------------- #
                # ------------------------------------------------------------------ #
                node_num = len(conf.get_cluster_info().management_nodes) + 1
                self.__status_report.nod_num = node_num
                logger.info("Replace node is starting.")

                connection_status = self.check_connections()
                if not connection_status.success:
                    self.__status_report.failed_tasks.extend(
                        connection_status.failed_tasks)
                    logger.error("Connection ping error.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().connection_error

                status = self.check_remote_connection()
                NTPConf().setup_ntp_local()

                if not status.success:
                    self.__status_report = status
                    return BuildStatus().error

                logger.info("Stopping petasan services on local node.")
                self.stop_petasan_services(remote=False)
                logger.info("Starting clean_ceph.")
                clean_ceph_local()
                logger.info("Starting local clean_consul.")
                clean_consul_local()

                status = replace_consul_leader()
                if not status.success:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)
                    logger.error("Could not replace consul leader.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_consul_error

                status = replace_local_monitor()
                if not status.success:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_monitors_error

                status = create_osds_local()
                if not status.success:
                    self.__status_report = status
                    logger.error("Could not build ceph OSDs.")
                    logger.error(self.__status_report.failed_tasks)
                    return BuildStatus().build_osd_error
                else:
                    self.__status_report.failed_tasks.extend(
                        status.failed_tasks)

                logger.info("Main core components deployed.")
                logger.info("Starting all services.")
                self.start_petasan_services(remote=False)
                test_active_clean()

                SharedFS().rebuild_management_node()

                logger.info("Node successfully added to cluster.")
                self.run_post_deploy_script()
                self.kill_petasan_console(False)
                os.remove(ConfigAPI().get_replace_file_path())
                return BuildStatus().done_replace

        except Exception as ex:
            config_api = ConfigAPI()
            if os.path.exists(config_api.get_cluster_info_file_path()):
                os.remove(config_api.get_cluster_info_file_path())
            logger.exception(ex.message)
            return BuildStatus().error

        return BuildStatus().done