def check_connections(self): ''' to get all ips of management, backend1 and backend2 and ping them :param: :return:list of all possible ping errors ''' current_cluster_info = configuration().get_cluster_info() management_nodes = current_cluster_info.management_nodes status_report = StatusReport() status_report.success = True for node in management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(node)) management_host = node_info.management_ip backend1_host = node_info.backend_1_ip backend2_host = node_info.backend_2_ip if not Network().ping(management_host): status_report.failed_tasks.append( 'core_deploy_ping_error_node_{}_management'.format( management_nodes.index(node) + 1)) if not Network().ping(backend1_host): status_report.failed_tasks.append( 'core_deploy_ping_error_node_{}_backend1'.format( management_nodes.index(node) + 1)) if not Network().ping(backend2_host): status_report.failed_tasks.append( 'core_deploy_ping_error_node_{}_backend2'.format( management_nodes.index(node) + 1)) if len(status_report.failed_tasks) > 0: status_report.success = False return status_report
def __sync_cluster_config_file(self): try: manage_conf = configuration() current_node_name = manage_conf.get_node_info().name cluster_info = manage_conf.get_cluster_info() config_api = ConfigAPI() for i in cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) if node_info.name != current_node_name: ssh_obj = ssh() if not ssh_obj.copy_file_to_host( node_info.management_ip, config_api.get_cluster_info_file_path()): logger.error( "Could not copy configuration file to {} server.". format(node_info.name)) self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_sync_config_file") return False except Exception as ex: logger.exception(ex.message) self.__status_report.success = False self.__status_report.failed_tasks.append( "core_cluster_deploy_couldnt_sync_config_file") return False # copy_file_to_host return True
def get_node_info(self): config = ConfigAPI() with open(config.get_node_info_file_path(), 'r') as f: data = json.load(f) node = NodeInfo() node.load_json(json.dumps(data)) return node
def get_security_key_(): # get the security code from the server we're connecting to ssh_exec = ssh() conf = configuration() cluster_info = conf.get_cluster_info() for cluster_node in cluster_info.management_nodes: remote_node_info = NodeInfo() remote_node_info.load_json(json.dumps(cluster_node)) if remote_node_info.management_ip == conf.get_node_info( ).management_ip: continue command_result, err = ssh_exec.exec_command( remote_node_info.management_ip, 'python ' + ConfigAPI().get_consul_encryption_key_script()) if err is not None and str(err) != "": logger.error("Could not read Consul encryption key from node: " + remote_node_info.management_ip) logger.error(err) print('command_result: ', command_result) else: key = str(command_result.splitlines()[0]) if key is not None and key != "": return key return None
def get_remote_ips(self, current_node_name): current_cluster_info = configuration().get_cluster_info() remote_mons_ips = [] for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) if current_node_name != node_info.name: remote_mons_ips.append(node_info.management_ip) return remote_mons_ips
def __test_leaders(): sleeps = [15, 15, 10, 10, 5, 5] tries = 5 leaders_in_cluster = [] cluster_members = [] cluster_conf = configuration() current_cluster_info = cluster_conf.get_cluster_info() current_node_info = cluster_conf.get_node_info() cluster_members.append(current_node_info.name) for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) cluster_members.append(node_info.name) status_report = StatusReport() for host in cluster_members: while tries: status = None try: status = _leader_status_check_(host) except Exception as exc: logger.error("Error Connecting to consul for leader check.") # if not has_reached_quorum: if not status: tries -= 1 sleep_seconds = sleeps.pop() logger.warning('waiting %s seconds before retrying', sleep_seconds) # time.sleep(sleep_seconds) sleep(sleep_seconds) status_report.success = False else: leaders_in_cluster.append(host) logger.info('Cluster Node {} joined the cluster and is alive' + host) status_report.success = True break if status_report.success is False: status_report.failed_tasks.append( 'core_consul_deploy_build_node_fail_join_cluster_not_alive' + "%" + str(host)) if leaders_in_cluster == cluster_members: logger.info("Consul leaders are ready") status_report.success = True return status_report else: logger.error("Consul leaders are not ready") return status_report
def get_management_nodes_config(self): """ :rtype : [NodeInfo] """ nodes = [] for i in configuration().get_cluster_info().management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) nodes.append(node_info) return nodes
def get_remote_nodes_config(self, current_node_name): """ :rtype : [NodeInfo] """ current_cluster_info = configuration().get_cluster_info() remote_nodes = [] for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) if current_node_name != node_info.name: remote_nodes.append(node_info) return remote_nodes
def is_node_in_cluster_config(self): """ :rtype : [NodeInfo] """ current_cluster_info = configuration().get_cluster_info() current_node_name = configuration().get_node_name() for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) if current_node_name == node_info.name: return True return False
def __create_leader_conf_remotely(key_gen, cluster_info, local_node_info): ssh_exec = ssh() for cluster_node in cluster_info.management_nodes: remote_node_info = NodeInfo() remote_node_info.load_json(json.dumps(cluster_node)) if local_node_info.backend_1_ip != remote_node_info.backend_1_ip: command_result = ssh_exec.call_command( remote_node_info.backend_1_ip, 'python ' + ConfigAPI().get_consul_create_conf_script() + ' -key="' + key_gen + '"') if command_result is False: logger.error( "Could not create Consul Configuration on node: " + remote_node_info.backend_1_ip) return command_result return True
def test_set_Node(): from PetaSAN.core.cluster.configuration import configuration from PetaSAN.backend.cluster.manage_node import ManageNode from PetaSAN.backend.cluster.deploy import Wizard from PetaSAN.core.cluster.network import Network net = Network() wizerd = Wizard() conf = configuration() node = NodeInfo() m_node = ManageNode() node.backend_1_ip = "192.168.130.100" node.backend_2_ip = "192.168.120.100" node.management_ip = net.get_node_management_ip() #clu= conf.get_cluster_info() #clu.management_nodes.append(node) #conf.set_cluster_network_info(clu) print wizerd.set_node_info(node)
def __start_leader_remotely(cluster_info, local_node_info): logger.info('Start consul leaders remotely.') ssh_exec = ssh() for cluster_node in cluster_info.management_nodes: remote_node_info = NodeInfo() remote_node_info.load_json(json.dumps(cluster_node)) logger.debug('local_node_info.backend_1_ip: ' + local_node_info.backend_1_ip) logger.debug('remote_node_info.backend_1_ip: ' + remote_node_info.backend_1_ip) if local_node_info.backend_1_ip != remote_node_info.backend_1_ip: logger.debug( 'Sending: ' + 'python ' + ConfigAPI().get_consul_start_up_script_path() + ' -retry-join {} '.format(local_node_info.backend_1_ip)) ssh_exec.exec_command( remote_node_info.backend_1_ip, 'python ' + ConfigAPI().get_consul_start_up_script_path() + ' -retry-join {} '.format(local_node_info.backend_1_ip)) return
def __test_mons(): sleeps = [15, 15, 10, 10, 5, 5] tries = 5 mon_in_quorum = [] mon_members = [] cluster_conf = configuration() current_cluster_info = cluster_conf.get_cluster_info() for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) mon_members.append(node_info.name) for host in mon_members: while tries: status = mon_status_check() has_reached_quorum = host in status.get('quorum_names', '') if not has_reached_quorum: tries -= 1 sleep_seconds = sleeps.pop() logger.warning('Waiting %s seconds before retrying', sleep_seconds) time.sleep(sleep_seconds) else: mon_in_quorum.append(host) break if mon_in_quorum == mon_members: logger.info("Ceph monitors are ready.") return True else: logger.info("Ceph monitors are not ready.") return False
as published by the Free Software Foundation This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. ''' from PetaSAN.core.cluster.configuration import configuration from PetaSAN.core.common.cmd import * from PetaSAN.core.entity.cluster import NodeInfo import json from PetaSAN.core.config.api import ConfigAPI config = configuration() node = config.get_node_info() cluster = config.get_cluster_info() conf = configuration() cluster_info = conf.get_cluster_info() first_cluster_node = cluster_info.management_nodes[0] first_node_info = NodeInfo() first_node_info.load_json(json.dumps(first_cluster_node)) second_cluster_node = cluster_info.management_nodes[1] second_node_info = NodeInfo() second_node_info.load_json(json.dumps(second_cluster_node)) call_cmd('python' + ConfigAPI().get_consul_start_up_script_path() + ' " -retry-join 192.168.17.14"')
def build_monitors(): cluster_name = configuration().get_cluster_name() ceph_mon_keyring = ConfigAPI().get_ceph_mon_keyring(cluster_name) ceph_client_admin_keyring = ConfigAPI().get_ceph_keyring_path(cluster_name) status = StatusReport() try: _fsid = uuid.uuid4() content = "[global]\n\ fsid = {fsid}\n\ mon_host = {mon_host}\n\ \n\ public_network = {public_network}\n\ cluster_network = {cluster_network}\n\ \n" cluster_config = configuration() current_node_info = cluster_config.get_node_info() current_node_name = current_node_info.name current_cluster_info = cluster_config.get_cluster_info() config_api = ConfigAPI() mon_hosts_backend_ip = [] remote_mons_management_ips = [] for i in current_cluster_info.management_nodes: node_info = NodeInfo() node_info.load_json(json.dumps(i)) mon_hosts_backend_ip.append(node_info.backend_1_ip) if current_node_name != node_info.name: remote_mons_management_ips.append(node_info.management_ip) if not os.path.exists(config_api.get_cluster_ceph_dir_path()): os.makedirs(os.path.dirname( config_api.get_cluster_ceph_dir_path())) with open( config_api.get_cluster_ceph_dir_path() + "{}.conf".format(cluster_name), 'w', ) as f: f.write( content.format( fsid=_fsid, public_network=str( current_cluster_info.backend_1_base_ip) + "/" + __get_net_size(str(current_cluster_info.backend_1_mask)), cluster_network=str( current_cluster_info.backend_2_base_ip) + "/" + __get_net_size(str(current_cluster_info.backend_2_mask)), mon_initial=cluster_config.get_node_name(), mon_host=cluster_config.get_node_info().backend_1_ip + ',' + ','.join(mon_hosts_backend_ip)) + cluster_config.get_ceph_tunings() + "\n") if not call_cmd( "ceph-authtool --create-keyring /tmp/{} --gen-key -n mon. --cap mon 'allow *'" .format(ceph_mon_keyring)): logger.error( "ceph-authtool --create-keyring for mon returned error") status.success = False # elif not call_cmd("".join(["ceph-authtool --create-keyring {}".format(ceph_client_admin_keyring), # " --gen-key -n client.admin --set-uid=0 --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'"])) : # Nautilius remove --set-uid=0 elif not call_cmd("".join([ "ceph-authtool --create-keyring {}".format( ceph_client_admin_keyring), " --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow'" ])): logger.error( "ceph-authtool --create-keyring for admin returned error") status.success = False elif not call_cmd("ceph-authtool /tmp/{} --import-keyring {}".format( ceph_mon_keyring, ceph_client_admin_keyring)): logger.error("ceph-authtool --import-keyring returned error") status.success = False elif not call_cmd( "monmaptool --create --add {} {} --fsid {} /tmp/monmap".format( cluster_config.get_node_name(), cluster_config.get_node_info().backend_1_ip, _fsid)): logger.error("monmaptool --create --add returned error") status.success = False if not os.path.exists("/var/lib/ceph/mon/{}-{}".format( cluster_name, current_node_name)): os.makedirs("/var/lib/ceph/mon/{}-{}".format( cluster_name, current_node_name)) if not status.success or not call_cmd( "ceph-mon --cluster {} --mkfs -i {} --monmap /tmp/monmap --keyring /tmp/{}" .format(cluster_name, current_node_name, ceph_mon_keyring)): logger.error("ceph-mon --mkfs --add returned error") status.success = False open( "/var/lib/ceph/mon/{}-{}/done".format(cluster_name, current_node_name), 'w+').close() open( "/var/lib/ceph/mon/{}-{}/systemd".format(cluster_name, current_node_name), 'w+').close() call_cmd("chown -R ceph:ceph /var/lib/ceph/mon") call_cmd("systemctl enable ceph.target ") call_cmd("systemctl enable ceph-mon.target ") call_cmd("systemctl enable ceph-mon@{} ".format(current_node_name)) if not status.success or not call_cmd( "systemctl start ceph-mon@{} ".format(current_node_name)): status.success = False if not status.success: status.failed_tasks.append( "Create ceph mon on {} returned error.".format( current_node_name)) return status logger.info("First monitor started successfully") # create local manager : call_cmd('/opt/petasan/scripts/create_mgr.py') logger.info("Starting to deploy remote monitors") # call_cmd("ceph-create-keys --cluster {} -i {} ".format(cluster_name,current_node_name)) # Nautilius copy bootstrap-osd ourselves if not os.path.exists("/var/lib/ceph/bootstrap-osd/"): os.makedirs("/var/lib/ceph/bootstrap-osd/") call_cmd( 'ceph auth get client.bootstrap-osd > /var/lib/ceph/bootstrap-osd/ceph.keyring' ) for remote_mon in remote_mons_management_ips: ssh_obj = ssh() if not ssh_obj.copy_file_to_host( remote_mon, "{}".format(ceph_client_admin_keyring)): logger.error("Cannot copy {} to {}".format( ceph_client_admin_keyring, remote_mon)) status.success = False elif not ssh_obj.copy_file_to_host( remote_mon, "/etc/ceph/{}.conf".format(cluster_name)): logger.error("Cannot copy ceph.conf to {}".format(remote_mon)) status.success = False elif not ssh_obj.call_command( remote_mon, " python {} ".format( config_api.get_node_create_mon_script_path())): logger.error("Cannot create monitor on remote node {}".format( remote_mon)) status.success = False # Nautilius copy bootstrap-osd ourselves : elif not ssh_obj.call_command( remote_mon, 'mkdir -p /var/lib/ceph/bootstrap-osd'): logger.error( "Cannot create bootstrap-osd dir on remote node {}".format( remote_mon)) status.success = False elif not ssh_obj.copy_file_to_host( remote_mon, '/var/lib/ceph/bootstrap-osd/ceph.keyring'): logger.error("Cannot copy bootstrap-osd keyring to {}".format( remote_mon)) status.success = False if not status.success: status.failed_tasks.append( "core_cluster_deploy_monitor_create_err" + "%" + remote_mon) return status if not __test_mons(): status.success = False status.failed_tasks.append("core_cluster_deploy_monitors_down_err") return status # Nautilius enable msgr2 : call_cmd('ceph mon enable-msgr2') except Exception as ex: status.success = False logger.exception(ex.message) status.failed_tasks.append( "core_cluster_deploy_monitor_exception_occurred" + "%" + current_node_name) return status status.success = True return status
def new(): conf = configuration() current_node_name = conf.get_node_info().name clu = conf.get_cluster_info() logger.info('Creating new cluster named %s', clu.name) cfg = CephConf() cfg.add_section('global') fsid = uuid.uuid4() cfg.set('global', 'fsid', str(fsid)) # if networks were passed in, lets set them in the # global section cfg.set('global', 'public network', str(clu.backend_1_base_ip)+"/"+get_net_size(str(clu.backend_1_mask))) cfg.set('global', 'cluster network', str(clu.backend_2_base_ip)+"/"+get_net_size(str(clu.backend_2_mask))) mon_initial_members = [] mon_host = [] config_api = ConfigAPI() for i in clu.management_nodes: node_info=NodeInfo() node_info.load_json(json.dumps(i)) mon_initial_members.append(node_info.name) mon_host.append(node_info.backend_1_ip) cfg.set('global', 'mon initial members', ', '.join(mon_initial_members)) # no spaces here, see http://tracker.newdream.net/issues/3145 cfg.set('global', 'mon host', ','.join(mon_host)) # override undesirable defaults, needed until bobtail # http://tracker.ceph.com/issues/6788 cfg.set('global', 'auth cluster required', 'cephx') cfg.set('global', 'auth service required', 'cephx') cfg.set('global', 'auth client required', 'cephx') cfg.set('global', 'mon clock drift allowed', '.300') cfg.set('global', 'osd pool default size', '2') cfg.set('global', 'max open files', '131072') # http://tracker.newdream.net/issues/3138 cfg.set('global', 'filestore xattr use omap', 'true') path = '{name}.conf'.format( name=clu.name, ) new_mon_keyring(clu.name) logger.info('Writing initial config to %s...', path) tmp = '%s.tmp' % path with file(tmp, 'w') as f: cfg.write(f) try: os.rename(tmp, path) except OSError as e: raise
description= 'This is a script that will start up the configured consul cluster.') parser.add_argument('-retry-join', '--retry-join', help='', required=False) args = parser.parse_args() retry_join_arg = args.retry_join # print('args.retry_join: ', retry_join_arg) config = configuration() node = config.get_node_info() cluster_info = config.get_cluster_info() retry_join = '' for cluster_node in cluster_info.management_nodes: remote_node_info = NodeInfo() remote_node_info.load_json(json.dumps(cluster_node)) if remote_node_info.backend_1_ip != node.backend_1_ip: retry_join = retry_join + ' -retry-join ' + remote_node_info.backend_1_ip str_start_command = "consul agent -raft-protocol 2 -config-dir /opt/petasan/config/etc/consul.d/server -bind {} ".format( node.backend_1_ip) + retry_join # print('A: ', str_start_command) if retry_join_arg is not None: # if str(retry_join).find(str(retry_join_arg)) == -1: # print('str_start_command: str_start_command: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>', str_start_command) str_start_command = str_start_command + " -retry-join " + retry_join_arg logger.info('str_start_command: >>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ' +