Ejemplo n.º 1
0
    def check_connections(self):
        '''
        to get all ips of management, backend1 and backend2 and ping them
        :param:
        :return:list of all possible ping errors
        '''

        current_cluster_info = configuration().get_cluster_info()
        management_nodes = current_cluster_info.management_nodes
        status_report = StatusReport()
        status_report.success = True

        for node in management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(node))
            management_host = node_info.management_ip
            backend1_host = node_info.backend_1_ip
            backend2_host = node_info.backend_2_ip

            if not Network().ping(management_host):
                status_report.failed_tasks.append(
                    'core_deploy_ping_error_node_{}_management'.format(
                        management_nodes.index(node) + 1))
            if not Network().ping(backend1_host):
                status_report.failed_tasks.append(
                    'core_deploy_ping_error_node_{}_backend1'.format(
                        management_nodes.index(node) + 1))
            if not Network().ping(backend2_host):
                status_report.failed_tasks.append(
                    'core_deploy_ping_error_node_{}_backend2'.format(
                        management_nodes.index(node) + 1))
            if len(status_report.failed_tasks) > 0:
                status_report.success = False

        return status_report
Ejemplo n.º 2
0
    def __sync_cluster_config_file(self):
        try:
            manage_conf = configuration()
            current_node_name = manage_conf.get_node_info().name
            cluster_info = manage_conf.get_cluster_info()
            config_api = ConfigAPI()

            for i in cluster_info.management_nodes:
                node_info = NodeInfo()
                node_info.load_json(json.dumps(i))

                if node_info.name != current_node_name:
                    ssh_obj = ssh()
                    if not ssh_obj.copy_file_to_host(
                            node_info.management_ip,
                            config_api.get_cluster_info_file_path()):
                        logger.error(
                            "Could not copy configuration file to {} server.".
                            format(node_info.name))
                        self.__status_report.success = False
                        self.__status_report.failed_tasks.append(
                            "core_cluster_deploy_couldnt_sync_config_file")
                        return False

        except Exception as ex:
            logger.exception(ex.message)
            self.__status_report.success = False
            self.__status_report.failed_tasks.append(
                "core_cluster_deploy_couldnt_sync_config_file")
            return False

        # copy_file_to_host
        return True
Ejemplo n.º 3
0
 def get_node_info(self):
     config = ConfigAPI()
     with open(config.get_node_info_file_path(), 'r') as f:
         data = json.load(f)
         node = NodeInfo()
         node.load_json(json.dumps(data))
         return node
Ejemplo n.º 4
0
def get_security_key_():
    # get the security code from the server we're connecting to
    ssh_exec = ssh()

    conf = configuration()
    cluster_info = conf.get_cluster_info()

    for cluster_node in cluster_info.management_nodes:
        remote_node_info = NodeInfo()
        remote_node_info.load_json(json.dumps(cluster_node))
        if remote_node_info.management_ip == conf.get_node_info(
        ).management_ip:
            continue
        command_result, err = ssh_exec.exec_command(
            remote_node_info.management_ip,
            'python ' + ConfigAPI().get_consul_encryption_key_script())

        if err is not None and str(err) != "":
            logger.error("Could not read Consul encryption key from node: " +
                         remote_node_info.management_ip)
            logger.error(err)
            print('command_result: ', command_result)
        else:
            key = str(command_result.splitlines()[0])
            if key is not None and key != "":
                return key
    return None
Ejemplo n.º 5
0
    def get_remote_ips(self, current_node_name):

        current_cluster_info = configuration().get_cluster_info()
        remote_mons_ips = []
        for i in current_cluster_info.management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(i))
            if current_node_name != node_info.name:
                remote_mons_ips.append(node_info.management_ip)
        return remote_mons_ips
Ejemplo n.º 6
0
def __test_leaders():
    sleeps = [15, 15, 10, 10, 5, 5]
    tries = 5

    leaders_in_cluster = []
    cluster_members = []

    cluster_conf = configuration()
    current_cluster_info = cluster_conf.get_cluster_info()

    current_node_info = cluster_conf.get_node_info()
    cluster_members.append(current_node_info.name)

    for i in current_cluster_info.management_nodes:
        node_info = NodeInfo()
        node_info.load_json(json.dumps(i))
        cluster_members.append(node_info.name)

    status_report = StatusReport()

    for host in cluster_members:
        while tries:
            status = None
            try:
                status = _leader_status_check_(host)
            except Exception as exc:
                logger.error("Error Connecting to consul for leader check.")
            # if not has_reached_quorum:
            if not status:
                tries -= 1
                sleep_seconds = sleeps.pop()
                logger.warning('waiting %s seconds before retrying',
                               sleep_seconds)
                # time.sleep(sleep_seconds)
                sleep(sleep_seconds)
                status_report.success = False
            else:
                leaders_in_cluster.append(host)
                logger.info('Cluster Node {} joined the cluster and is alive' +
                            host)
                status_report.success = True
                break
        if status_report.success is False:
            status_report.failed_tasks.append(
                'core_consul_deploy_build_node_fail_join_cluster_not_alive' +
                "%" + str(host))
    if leaders_in_cluster == cluster_members:
        logger.info("Consul leaders are ready")
        status_report.success = True
        return status_report

    else:
        logger.error("Consul leaders are not ready")
        return status_report
Ejemplo n.º 7
0
    def get_management_nodes_config(self):

        """
        :rtype : [NodeInfo]
        """

        nodes = []
        for i in configuration().get_cluster_info().management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(i))
            nodes.append(node_info)
        return nodes
Ejemplo n.º 8
0
    def get_remote_nodes_config(self, current_node_name):

        """
        :rtype : [NodeInfo]
        """
        current_cluster_info = configuration().get_cluster_info()
        remote_nodes = []
        for i in current_cluster_info.management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(i))
            if current_node_name != node_info.name:
                remote_nodes.append(node_info)
        return remote_nodes
Ejemplo n.º 9
0
    def is_node_in_cluster_config(self):

        """
        :rtype : [NodeInfo]
        """
        current_cluster_info = configuration().get_cluster_info()
        current_node_name = configuration().get_node_name()
        for i in current_cluster_info.management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(i))
            if current_node_name == node_info.name:
                return True
        return False
Ejemplo n.º 10
0
def __create_leader_conf_remotely(key_gen, cluster_info, local_node_info):
    ssh_exec = ssh()
    for cluster_node in cluster_info.management_nodes:
        remote_node_info = NodeInfo()
        remote_node_info.load_json(json.dumps(cluster_node))
        if local_node_info.backend_1_ip != remote_node_info.backend_1_ip:
            command_result = ssh_exec.call_command(
                remote_node_info.backend_1_ip,
                'python ' + ConfigAPI().get_consul_create_conf_script() +
                ' -key="' + key_gen + '"')
            if command_result is False:
                logger.error(
                    "Could not create Consul Configuration on node: " +
                    remote_node_info.backend_1_ip)
                return command_result
    return True
def test_set_Node():
    from PetaSAN.core.cluster.configuration import configuration
    from PetaSAN.backend.cluster.manage_node import ManageNode
    from PetaSAN.backend.cluster.deploy import Wizard
    from PetaSAN.core.cluster.network import Network
    net = Network()
    wizerd = Wizard()
    conf = configuration()

    node = NodeInfo()
    m_node = ManageNode()
    node.backend_1_ip = "192.168.130.100"
    node.backend_2_ip = "192.168.120.100"
    node.management_ip = net.get_node_management_ip()
    #clu= conf.get_cluster_info()
    #clu.management_nodes.append(node)
    #conf.set_cluster_network_info(clu)
    print wizerd.set_node_info(node)
Ejemplo n.º 12
0
def __start_leader_remotely(cluster_info, local_node_info):
    logger.info('Start consul leaders remotely.')
    ssh_exec = ssh()
    for cluster_node in cluster_info.management_nodes:
        remote_node_info = NodeInfo()
        remote_node_info.load_json(json.dumps(cluster_node))

        logger.debug('local_node_info.backend_1_ip: ' +
                     local_node_info.backend_1_ip)
        logger.debug('remote_node_info.backend_1_ip: ' +
                     remote_node_info.backend_1_ip)

        if local_node_info.backend_1_ip != remote_node_info.backend_1_ip:
            logger.debug(
                'Sending: ' + 'python ' +
                ConfigAPI().get_consul_start_up_script_path() +
                ' -retry-join {} '.format(local_node_info.backend_1_ip))

            ssh_exec.exec_command(
                remote_node_info.backend_1_ip,
                'python ' + ConfigAPI().get_consul_start_up_script_path() +
                ' -retry-join {} '.format(local_node_info.backend_1_ip))
    return
Ejemplo n.º 13
0
def __test_mons():
    sleeps = [15, 15, 10, 10, 5, 5]
    tries = 5
    mon_in_quorum = []
    mon_members = []

    cluster_conf = configuration()
    current_cluster_info = cluster_conf.get_cluster_info()

    for i in current_cluster_info.management_nodes:
        node_info = NodeInfo()
        node_info.load_json(json.dumps(i))
        mon_members.append(node_info.name)

    for host in mon_members:
        while tries:
            status = mon_status_check()
            has_reached_quorum = host in status.get('quorum_names', '')

            if not has_reached_quorum:
                tries -= 1
                sleep_seconds = sleeps.pop()
                logger.warning('Waiting %s seconds before retrying',
                               sleep_seconds)
                time.sleep(sleep_seconds)
            else:
                mon_in_quorum.append(host)
                break

    if mon_in_quorum == mon_members:
        logger.info("Ceph monitors are ready.")
        return True

    else:
        logger.info("Ceph monitors are not ready.")
        return False
 as published by the Free Software Foundation

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU Affero General Public License for more details.
'''

from PetaSAN.core.cluster.configuration import configuration
from PetaSAN.core.common.cmd import *
from PetaSAN.core.entity.cluster import NodeInfo
import json
from PetaSAN.core.config.api import ConfigAPI

config = configuration()
node = config.get_node_info()
cluster = config.get_cluster_info()

conf = configuration()
cluster_info = conf.get_cluster_info()

first_cluster_node = cluster_info.management_nodes[0]
first_node_info = NodeInfo()
first_node_info.load_json(json.dumps(first_cluster_node))

second_cluster_node = cluster_info.management_nodes[1]
second_node_info = NodeInfo()
second_node_info.load_json(json.dumps(second_cluster_node))

call_cmd('python' + ConfigAPI().get_consul_start_up_script_path() +
         ' " -retry-join 192.168.17.14"')
Ejemplo n.º 15
0
def build_monitors():
    cluster_name = configuration().get_cluster_name()
    ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name)
    ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name)
    status = StatusReport()

    try:
        _fsid = uuid.uuid4()

        content = "[global]\n\
fsid = {fsid}\n\
mon_host = {mon_host}\n\
\n\
public_network = {public_network}\n\
cluster_network = {cluster_network}\n\
\n"

        cluster_config = configuration()
        current_node_info = cluster_config.get_node_info()

        current_node_name = current_node_info.name
        current_cluster_info = cluster_config.get_cluster_info()

        config_api = ConfigAPI()
        mon_hosts_backend_ip = []
        remote_mons_management_ips = []

        for i in current_cluster_info.management_nodes:
            node_info = NodeInfo()
            node_info.load_json(json.dumps(i))
            mon_hosts_backend_ip.append(node_info.backend_1_ip)
            if current_node_name != node_info.name:
                remote_mons_management_ips.append(node_info.management_ip)

        if not os.path.exists(config_api.get_cluster_ceph_dir_path()):
            os.makedirs(os.path.dirname(
                config_api.get_cluster_ceph_dir_path()))

        with open(
                config_api.get_cluster_ceph_dir_path() +
                "{}.conf".format(cluster_name),
                'w',
        ) as f:
            f.write(
                content.format(
                    fsid=_fsid,
                    public_network=str(
                        current_cluster_info.backend_1_base_ip) + "/" +
                    __get_net_size(str(current_cluster_info.backend_1_mask)),
                    cluster_network=str(
                        current_cluster_info.backend_2_base_ip) + "/" +
                    __get_net_size(str(current_cluster_info.backend_2_mask)),
                    mon_initial=cluster_config.get_node_name(),
                    mon_host=cluster_config.get_node_info().backend_1_ip +
                    ',' + ','.join(mon_hosts_backend_ip)) +
                cluster_config.get_ceph_tunings() + "\n")

        if not call_cmd(
                "ceph-authtool --create-keyring /tmp/{} --gen-key -n mon. --cap mon 'allow *'"
                .format(ceph_mon_keyring)):
            logger.error(
                "ceph-authtool --create-keyring for mon returned error")
            status.success = False

        # elif not call_cmd("".join(["ceph-authtool --create-keyring {}".format(ceph_client_admin_keyring),
        #                    " --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'"])) :
        # Nautilius remove --set-uid=0

        elif not call_cmd("".join([
                "ceph-authtool --create-keyring {}".format(
                    ceph_client_admin_keyring),
                " --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'"
        ])):
            logger.error(
                "ceph-authtool --create-keyring for admin returned error")
            status.success = False

        elif not call_cmd("ceph-authtool /tmp/{} --import-keyring {}".format(
                ceph_mon_keyring, ceph_client_admin_keyring)):
            logger.error("ceph-authtool --import-keyring returned error")
            status.success = False

        elif not call_cmd(
                "monmaptool --create --add {} {} --fsid {} /tmp/monmap".format(
                    cluster_config.get_node_name(),
                    cluster_config.get_node_info().backend_1_ip, _fsid)):
            logger.error("monmaptool --create --add returned error")
            status.success = False

        if not os.path.exists("/var/lib/ceph/mon/{}-{}".format(
                cluster_name, current_node_name)):
            os.makedirs("/var/lib/ceph/mon/{}-{}".format(
                cluster_name, current_node_name))

        if not status.success or not call_cmd(
                "ceph-mon --cluster {} --mkfs -i {} --monmap /tmp/monmap --keyring /tmp/{}"
                .format(cluster_name, current_node_name, ceph_mon_keyring)):
            logger.error("ceph-mon --mkfs --add returned error")
            status.success = False

        open(
            "/var/lib/ceph/mon/{}-{}/done".format(cluster_name,
                                                  current_node_name),
            'w+').close()
        open(
            "/var/lib/ceph/mon/{}-{}/systemd".format(cluster_name,
                                                     current_node_name),
            'w+').close()

        call_cmd("chown -R ceph:ceph /var/lib/ceph/mon")

        call_cmd("systemctl enable ceph.target ")
        call_cmd("systemctl enable ceph-mon.target ")
        call_cmd("systemctl enable ceph-mon@{} ".format(current_node_name))
        if not status.success or not call_cmd(
                "systemctl start ceph-mon@{}  ".format(current_node_name)):
            status.success = False

        if not status.success:
            status.failed_tasks.append(
                "Create ceph mon on {} returned error.".format(
                    current_node_name))
            return status

        logger.info("First monitor started successfully")

        # create local manager :
        call_cmd('/opt/petasan/scripts/create_mgr.py')

        logger.info("Starting to deploy remote monitors")

        # call_cmd("ceph-create-keys --cluster {} -i {}  ".format(cluster_name,current_node_name))
        # Nautilius copy bootstrap-osd ourselves
        if not os.path.exists("/var/lib/ceph/bootstrap-osd/"):
            os.makedirs("/var/lib/ceph/bootstrap-osd/")
            call_cmd(
                'ceph auth get client.bootstrap-osd > /var/lib/ceph/bootstrap-osd/ceph.keyring'
            )

        for remote_mon in remote_mons_management_ips:
            ssh_obj = ssh()
            if not ssh_obj.copy_file_to_host(
                    remote_mon, "{}".format(ceph_client_admin_keyring)):
                logger.error("Cannot copy {} to {}".format(
                    ceph_client_admin_keyring, remote_mon))
                status.success = False
            elif not ssh_obj.copy_file_to_host(
                    remote_mon, "/etc/ceph/{}.conf".format(cluster_name)):
                logger.error("Cannot copy ceph.conf to {}".format(remote_mon))
                status.success = False
            elif not ssh_obj.call_command(
                    remote_mon, " python {} ".format(
                        config_api.get_node_create_mon_script_path())):
                logger.error("Cannot create monitor on remote node {}".format(
                    remote_mon))
                status.success = False

            # Nautilius copy bootstrap-osd ourselves :
            elif not ssh_obj.call_command(
                    remote_mon, 'mkdir -p /var/lib/ceph/bootstrap-osd'):
                logger.error(
                    "Cannot create bootstrap-osd dir on remote node {}".format(
                        remote_mon))
                status.success = False
            elif not ssh_obj.copy_file_to_host(
                    remote_mon, '/var/lib/ceph/bootstrap-osd/ceph.keyring'):
                logger.error("Cannot copy bootstrap-osd keyring to {}".format(
                    remote_mon))
                status.success = False

            if not status.success:
                status.failed_tasks.append(
                    "core_cluster_deploy_monitor_create_err" + "%" +
                    remote_mon)
                return status
        if not __test_mons():
            status.success = False
            status.failed_tasks.append("core_cluster_deploy_monitors_down_err")
            return status

        # Nautilius enable msgr2 :
        call_cmd('ceph mon enable-msgr2')

    except Exception as ex:
        status.success = False
        logger.exception(ex.message)
        status.failed_tasks.append(
            "core_cluster_deploy_monitor_exception_occurred" + "%" +
            current_node_name)
        return status

    status.success = True
    return status
Ejemplo n.º 16
0
def new():
    conf = configuration()
    current_node_name = conf.get_node_info().name
    clu = conf.get_cluster_info()

    logger.info('Creating new cluster named %s', clu.name)
    cfg = CephConf()
    cfg.add_section('global')

    fsid = uuid.uuid4()
    cfg.set('global', 'fsid', str(fsid))


    # if networks were passed in, lets set them in the
    # global section

    cfg.set('global', 'public network', str(clu.backend_1_base_ip)+"/"+get_net_size(str(clu.backend_1_mask)))

    cfg.set('global', 'cluster network', str(clu.backend_2_base_ip)+"/"+get_net_size(str(clu.backend_2_mask)))

    mon_initial_members = []
    mon_host = []



    config_api = ConfigAPI()
    for i in clu.management_nodes:
        node_info=NodeInfo()
        node_info.load_json(json.dumps(i))
        mon_initial_members.append(node_info.name)
        mon_host.append(node_info.backend_1_ip)






    cfg.set('global', 'mon initial members', ', '.join(mon_initial_members))
    # no spaces here, see http://tracker.newdream.net/issues/3145
    cfg.set('global', 'mon host', ','.join(mon_host))

    # override undesirable defaults, needed until bobtail

    # http://tracker.ceph.com/issues/6788
    cfg.set('global', 'auth cluster required', 'cephx')
    cfg.set('global', 'auth service required', 'cephx')
    cfg.set('global', 'auth client required', 'cephx')

    cfg.set('global', 'mon clock drift allowed', '.300')
    cfg.set('global', 'osd pool default size', '2')
    cfg.set('global', 'max open files', '131072')

    # http://tracker.newdream.net/issues/3138
    cfg.set('global', 'filestore xattr use omap', 'true')

    path = '{name}.conf'.format(
        name=clu.name,
        )

    new_mon_keyring(clu.name)

    logger.info('Writing initial config to %s...', path)
    tmp = '%s.tmp' % path
    with file(tmp, 'w') as f:
        cfg.write(f)
    try:
        os.rename(tmp, path)
    except OSError as e:
           raise
    description=
    'This is a script that will start up the configured consul cluster.')
parser.add_argument('-retry-join', '--retry-join', help='', required=False)
args = parser.parse_args()

retry_join_arg = args.retry_join
# print('args.retry_join: ', retry_join_arg)

config = configuration()
node = config.get_node_info()
cluster_info = config.get_cluster_info()

retry_join = ''

for cluster_node in cluster_info.management_nodes:
    remote_node_info = NodeInfo()
    remote_node_info.load_json(json.dumps(cluster_node))

    if remote_node_info.backend_1_ip != node.backend_1_ip:
        retry_join = retry_join + ' -retry-join ' + remote_node_info.backend_1_ip

str_start_command = "consul agent -raft-protocol 2 -config-dir /opt/petasan/config/etc/consul.d/server -bind {} ".format(
    node.backend_1_ip) + retry_join
# print('A: ', str_start_command)

if retry_join_arg is not None:
    # if str(retry_join).find(str(retry_join_arg)) == -1:
    # print('str_start_command: str_start_command: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>', str_start_command)
    str_start_command = str_start_command + " -retry-join " + retry_join_arg

logger.info('str_start_command: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ' +