def setup_keepalived(): """Task to provision VIP for openstack nodes with keepalived""" mgmt_ip = hstr_to_ip(env.host_string) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_password = env.passwords[env.host_string] if (getattr(env, 'openstack_admin_password', None)): openstack_admin_password = env.openstack_admin_password else: openstack_admin_password = '******' internal_vip = get_from_testbed_dict('ha', 'internal_vip', None) external_vip = get_from_testbed_dict('ha', 'external_vip', None) openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] myindex = openstack_host_list.index(self_host) if myindex >= 1: # Wait for VIP to be assiciated to MASTER with settings(host_string=env.roledefs['openstack'][0], warn_only=True): while run("ip addr | grep %s" % internal_vip).failed: sleep(2) print "Waiting for VIP to be associated to MASTER VRRP." continue with cd(INSTALLER_DIR): cmd = "PASSWORD=%s ADMIN_TOKEN=%s python setup-vnc-keepalived.py\ --self_ip %s --internal_vip %s --mgmt_self_ip %s\ --openstack_index %d --num_nodes %d" % (openstack_host_password, openstack_admin_password, self_ip, internal_vip, mgmt_ip, (openstack_host_list.index(self_host) + 1), len(env.roledefs['openstack'])) if external_vip: cmd += ' --external_vip %s' % external_vip run(cmd)
def setup_keepalived_node(role): """Task to provision VIP for node with keepalived""" mgmt_ip = hstr_to_ip(env.host_string) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) internal_vip = get_openstack_internal_vip() external_vip = get_openstack_external_vip() if role == 'cfgm': internal_vip = get_contrail_internal_vip() external_vip = get_contrail_external_vip() keepalived_host_list = [get_control_host_string(keepalived_host)\ for keepalived_host in env.roledefs[role]] myindex = keepalived_host_list.index(self_host) if myindex >= 1: # Wait for VIP to be assiciated to MASTER with settings(host_string=env.roledefs[role][0], warn_only=True): while sudo("ip addr | grep %s" % internal_vip).failed: sleep(2) print "Waiting for VIP to be associated to MASTER VRRP." continue with cd(INSTALLER_DIR): cmd = "setup-vnc-keepalived\ --self_ip %s --internal_vip %s --mgmt_self_ip %s\ --self_index %d --num_nodes %d --role %s" % ( self_ip, internal_vip, mgmt_ip, (keepalived_host_list.index(self_host) + 1), len(env.roledefs[role]), role) if external_vip: cmd += ' --external_vip %s' % external_vip sudo(cmd)
def setup_keepalived(): """Task to provision VIP for openstack nodes with keepalived""" mgmt_ip = hstr_to_ip(env.host_string) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_password = env.passwords[env.host_string] if (getattr(env, 'openstack_admin_password', None)): openstack_admin_password = env.openstack_admin_password else: openstack_admin_password = '******' internal_vip = get_from_testbed_dict('ha', 'internal_vip', None) external_vip = get_from_testbed_dict('ha', 'external_vip', None) openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] with cd(INSTALLER_DIR): cmd = "PASSWORD=%s ADMIN_TOKEN=%s python setup-vnc-keepalived.py\ --self_ip %s --internal_vip %s --mgmt_self_ip %s\ --openstack_index %d" % (openstack_host_password, openstack_admin_password, self_ip, internal_vip, mgmt_ip, (openstack_host_list.index(self_host) + 1)) if external_vip: cmd += ' --external_vip %s' % external_vip run(cmd)
def setup_galera_cluster(): """Task to cluster the openstack nodes with galera""" if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping galera cluster setup." return if env.roledefs['openstack'].index(env.host_string) == 0: execute('setup_passwordless_ssh', *env.roledefs['openstack']) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_password = env.passwords[env.host_string] if (getattr(env, 'openstack_admin_password', None)): openstack_admin_password = env.openstack_admin_password else: openstack_admin_password = '******' openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host)\ for galera_host in openstack_host_list] keystone_ip = get_keystone_ip() internal_vip = get_from_testbed_dict('ha', 'internal_vip', None) with cd(INSTALLER_DIR): run("PASSWORD=%s ADMIN_TOKEN=%s python setup-vnc-galera.py\ --self_ip %s --keystone_ip %s --galera_ip_list %s\ --internal_vip %s --openstack_index %d" % (openstack_host_password, openstack_admin_password, self_ip, keystone_ip, ' '.join(galera_ip_list), internal_vip, (openstack_host_list.index(self_host) + 1)))
def setup_keepalived_node(role): """Task to provision VIP for node with keepalived""" mgmt_ip = hstr_to_ip(env.host_string) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) internal_vip = get_openstack_internal_vip() external_vip = get_openstack_external_vip() if role == 'cfgm': internal_vip = get_contrail_internal_vip() external_vip = get_contrail_external_vip() keepalived_host_list = [get_control_host_string(keepalived_host)\ for keepalived_host in env.roledefs[role]] myindex = keepalived_host_list.index(self_host) if myindex >= 1: # Wait for VIP to be assiciated to MASTER with settings(host_string=env.roledefs[role][0], warn_only=True): while sudo("ip addr | grep %s" % internal_vip).failed: sleep(2) print "Waiting for VIP to be associated to MASTER VRRP." continue with cd(INSTALLER_DIR): cmd = "setup-vnc-keepalived\ --self_ip %s --internal_vip %s --mgmt_self_ip %s\ --self_index %d --num_nodes %d --role %s" % ( self_ip, internal_vip, mgmt_ip, (keepalived_host_list.index(self_host) + 1), len( env.roledefs[role]), role) if external_vip: cmd += ' --external_vip %s' % external_vip sudo(cmd)
def setup_keepalived(): """Task to provision VIP for openstack nodes with keepalived""" mgmt_ip = hstr_to_ip(env.host_string) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_password = env.passwords[env.host_string] if (getattr(env, 'openstack_admin_password', None)): openstack_admin_password = env.openstack_admin_password else: openstack_admin_password = '******' internal_vip = get_from_testbed_dict('ha', 'internal_vip', None) external_vip = get_from_testbed_dict('ha', 'external_vip', None) openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] with cd(INSTALLER_DIR): cmd = "PASSWORD=%s ADMIN_TOKEN=%s python setup-vnc-keepalived.py\ --self_ip %s --internal_vip %s --mgmt_self_ip %s\ --openstack_index %d" % ( openstack_host_password, openstack_admin_password, self_ip, internal_vip, mgmt_ip, (openstack_host_list.index(self_host) + 1)) if external_vip: cmd += ' --external_vip %s' % external_vip run(cmd)
def purge_node_from_openstack_cluster(del_openstack_node): if ping_test(del_openstack_node): # If CMON is running in the node to be purged, stop it. # Invalidate the config. with settings(host_string=del_openstack_node, warn_only=True): sudo("service contrail-hamon stop") sudo("service cmon stop") sudo("chkconfig contrail-hamon off") sudo("mv /etc/cmon.cnf /etc/cmon.cnf.removed") del_openstack_node_ip = hstr_to_ip(del_openstack_node) del_openstack_ctrl_ip = hstr_to_ip(get_control_host_string(del_openstack_node)) execute('fixup_restart_haproxy_in_openstack') execute("restart_openstack") execute('remove_node_from_galera', del_openstack_node_ip) execute('fix_cmon_param_and_add_keys_to_compute') with settings(host_string = env.roledefs['openstack'][0]): sudo("unregister-openstack-services --node_to_unregister %s" % del_openstack_ctrl_ip) if ping_test(del_openstack_node): with settings(host_string=del_openstack_node, warn_only=True): sudo("service mysql stop") sudo("service supervisor-openstack stop") sudo("chkconfig supervisor-openstack off")
def setup_keystone_ssl_certs_node(*nodes): default_certfile = '/etc/keystone/ssl/certs/keystone.pem' default_keyfile = '/etc/keystone/ssl/private/keystone.key' default_cafile = '/etc/keystone/ssl/certs/keystone_ca.pem' keystonecertbundle = get_keystone_cert_bundle() ssl_certs = ((get_keystone_certfile(), default_certfile), (get_keystone_keyfile(), default_keyfile), (get_keystone_cafile(), default_cafile)) index = env.roledefs['openstack'].index(env.host_string) + 1 for node in nodes: with settings(host_string=node, password=get_env_passwords(node)): for ssl_cert, default in ssl_certs: if ssl_cert == default: # Clear old certificate sudo('rm -f %s' % ssl_cert) sudo('rm -f %s' % keystonecertbundle) for ssl_cert, default in ssl_certs: if ssl_cert == default: openstack_host = env.roledefs['openstack'][0] if index == 1: if not exists(ssl_cert, use_sudo=True): print "Creating keystone SSL certs in first openstack node" subject_alt_names_mgmt = [hstr_to_ip(host) for host in env.roledefs['openstack']] subject_alt_names_ctrl = [hstr_to_ip(get_control_host_string(host)) for host in env.roledefs['openstack']] subject_alt_names = subject_alt_names_mgmt + subject_alt_names_ctrl if get_openstack_external_vip(): subject_alt_names.append(get_openstack_external_vip()) sudo('create-keystone-ssl-certs.sh %s %s' % ( get_openstack_internal_vip() or hstr_to_ip(get_control_host_string(openstack_host)), ','.join(subject_alt_names))) else: with settings(host_string=openstack_host, password=get_env_passwords(openstack_host)): while not exists(ssl_cert, use_sudo=True): print "Wait for SSL certs to be created in first openstack" sleep(0.1) print "Get SSL cert(%s) from first openstack" % ssl_cert tmp_dir= tempfile.mkdtemp() tmp_fname = os.path.join(tmp_dir, os.path.basename(ssl_cert)) get_as_sudo(ssl_cert, tmp_fname) print "Copy to this(%s) openstack node" % env.host_string sudo('mkdir -p /etc/keystone/ssl/certs/') sudo('mkdir -p /etc/keystone/ssl/private/') put(tmp_fname, ssl_cert, use_sudo=True) os.remove(tmp_fname) elif os.path.isfile(ssl_cert): print "Certificate (%s) exists locally" % ssl_cert put(ssl_cert, default, use_sudo=True) elif exists(ssl_cert, use_sudo=True): print "Certificate (%s) exists in openstack node" % ssl_cert pass else: raise RuntimeError("%s doesn't exists locally or in openstack node") if not exists(keystonecertbundle, use_sudo=True): ((certfile, _), (keyfile, _), (cafile, _)) = ssl_certs sudo('cat %s %s > %s' % (certfile, cafile, keystonecertbundle)) sudo("chown -R keystone:keystone /etc/keystone/ssl")
def remove_node_from_galera(del_galera_node): """Task to remove a node from the galera cluster. Removes config from other Galera nodes """ if len(env.roledefs['openstack']) < 3: raise RuntimeError("Galera cluster needs of quorum of at least 3 nodes! Cannot remove the node from cluster") self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host)\ for galera_host in openstack_host_list] authserver_ip = get_authserver_ip() internal_vip = get_openstack_internal_vip() external_vip = get_openstack_external_vip() zoo_ip_list = [hstr_to_ip(get_control_host_string(\ cassandra_host)) for cassandra_host in env.roledefs['database']] with cd(INSTALLER_DIR): cmd = "remove-galera-node\ --self_ip %s --node_to_del %s --keystone_ip %s --galera_ip_list %s\ --internal_vip %s --openstack_index %d --zoo_ip_list %s" % (self_ip, del_galera_node, authserver_ip, ' '.join(galera_ip_list), internal_vip, (openstack_host_list.index(self_host) + 1), ' '.join(zoo_ip_list)) if external_vip: cmd += ' --external_vip %s' % external_vip sudo(cmd)
def setup_galera_cluster(): """Task to cluster the openstack nodes with galera""" if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping galera cluster setup." return if env.roledefs['openstack'].index(env.host_string) == 0: execute('setup_passwordless_ssh', *env.roledefs['openstack']) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_password = env.passwords[env.host_string] if (getattr(env, 'openstack_admin_password', None)): openstack_admin_password = env.openstack_admin_password else: openstack_admin_password = '******' openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host)\ for galera_host in openstack_host_list] keystone_ip = get_keystone_ip() internal_vip = get_openstack_internal_vip() with cd(INSTALLER_DIR): run("PASSWORD=%s ADMIN_TOKEN=%s python setup-vnc-galera.py\ --self_ip %s --keystone_ip %s --galera_ip_list %s\ --internal_vip %s --openstack_index %d" % (openstack_host_password, openstack_admin_password, self_ip, keystone_ip, ' '.join(galera_ip_list), internal_vip, (openstack_host_list.index(self_host) + 1)))
def fixup_restart_haproxy_in_collector_node(*args): contrail_analytics_api_server_lines = '' space = ' ' * 3 for host_string in env.roledefs['collector']: server_index = env.roledefs['collector'].index(host_string) + 1 mgmt_host_ip = hstr_to_ip(host_string) host_ip = hstr_to_ip(get_control_host_string(host_string)) contrail_analytics_api_server_lines +=\ '%s server %s %s:9081 check inter 2000 rise 2 fall 3\n'\ % (space, host_ip, host_ip) for host_string in env.roledefs['collector']: haproxy_config = collector_haproxy.template.safe_substitute({ '__contrail_analytics_api_backend_servers__': contrail_analytics_api_server_lines, '__contrail_hap_user__': 'haproxy', '__contrail_hap_passwd__': 'contrail123', }) for host_string in args: with settings(host_string=host_string): # chop old settings including pesky default from pkg... tmp_fname = "/tmp/haproxy-%s-config" % (host_string) get_as_sudo("/etc/haproxy/haproxy.cfg", tmp_fname) with settings(warn_only=True): local( "sed -i -e '/^#contrail-collector-marker-start/,/^#contrail-collector-marker-end/d' %s" % (tmp_fname)) local( "sed -i -e 's/frontend\s*main\s*\*:5000/frontend main *:5001/' %s" % (tmp_fname)) local( "sed -i -e 's/ssl-relay 0.0.0.0:8443/ssl-relay 0.0.0.0:5002/' %s" % (tmp_fname)) local( "sed -i -e 's/option\shttplog/option tcplog/' %s" % (tmp_fname)) local("sed -i -e 's/maxconn 4096/maxconn 100000/' %s" % (tmp_fname)) # Remove default HA config local("sed -i '/listen\sappli1-rewrite/,/rspidel/d' %s" % tmp_fname) local("sed -i '/listen\sappli3-relais/,/rspidel/d' %s" % tmp_fname) # ...generate new ones cfg_file = open(tmp_fname, 'a') cfg_file.write(haproxy_config) cfg_file.close() put(tmp_fname, "/etc/haproxy/haproxy.cfg", use_sudo=True) local("rm %s" % (tmp_fname)) # haproxy enable with settings(host_string=host_string, warn_only=True): sudo("chkconfig haproxy on") enable_haproxy() sudo("service haproxy restart")
def fixup_restart_haproxy_in_collector_node(*args): contrail_analytics_api_server_lines = "" space = " " * 3 for host_string in env.roledefs["collector"]: server_index = env.roledefs["collector"].index(host_string) + 1 mgmt_host_ip = hstr_to_ip(host_string) host_ip = hstr_to_ip(get_control_host_string(host_string)) contrail_analytics_api_server_lines += "%s server %s %s:9081 check inter 2000 rise 2 fall 3\n" % ( space, host_ip, host_ip, ) for host_string in env.roledefs["collector"]: haproxy_config = collector_haproxy.template.safe_substitute( { "__contrail_analytics_api_backend_servers__": contrail_analytics_api_server_lines, "__contrail_hap_user__": "haproxy", "__contrail_hap_passwd__": "contrail123", } ) for host_string in args: with settings(host_string=host_string): # chop old settings including pesky default from pkg... tmp_fname = "/tmp/haproxy-%s-config" % (host_string) get_as_sudo("/etc/haproxy/haproxy.cfg", tmp_fname) with settings(warn_only=True): local( "sed -i -e '/^#contrail-collector-marker-start/,/^#contrail-collector-marker-end/d' %s" % (tmp_fname) ) local("sed -i -e 's/frontend\s*main\s*\*:5000/frontend main *:5001/' %s" % (tmp_fname)) local("sed -i -e 's/ssl-relay 0.0.0.0:8443/ssl-relay 0.0.0.0:5002/' %s" % (tmp_fname)) local("sed -i -e 's/option\shttplog/option tcplog/' %s" % (tmp_fname)) local("sed -i -e 's/maxconn 4096/maxconn 100000/' %s" % (tmp_fname)) # Remove default HA config local("sed -i '/listen\sappli1-rewrite/,/rspidel/d' %s" % tmp_fname) local("sed -i '/listen\sappli3-relais/,/rspidel/d' %s" % tmp_fname) # ...generate new ones cfg_file = open(tmp_fname, "a") cfg_file.write(haproxy_config) cfg_file.close() put(tmp_fname, "/etc/haproxy/haproxy.cfg", use_sudo=True) local("rm %s" % (tmp_fname)) # haproxy enable with settings(host_string=host_string, warn_only=True): sudo("chkconfig haproxy on") enable_haproxy() sudo("service haproxy restart")
def setup_galera_cluster(): """Task to cluster the openstack nodes with galera""" if len(env.roledefs["openstack"]) <= 1: print "Single Openstack cluster, skipping galera cluster setup." return if env.roledefs["openstack"].index(env.host_string) == 0: execute("setup_passwordless_ssh", *env.roledefs["openstack"]) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_list = [get_control_host_string(openstack_host) for openstack_host in env.roledefs["openstack"]] galera_ip_list = [hstr_to_ip(galera_host) for galera_host in openstack_host_list] authserver_ip = get_authserver_ip() internal_vip = get_openstack_internal_vip() external_vip = get_openstack_external_vip() zoo_ip_list = [hstr_to_ip(get_control_host_string(cassandra_host)) for cassandra_host in env.roledefs["database"]] monitor_galera = "False" if get_openstack_internal_vip(): monitor_galera = "True" cmon_db_user = "******" cmon_db_pass = "******" keystone_db_user = "******" keystone_db_pass = "******" with cd(INSTALLER_DIR): cmd = ( "setup-vnc-galera\ --self_ip %s --keystone_ip %s --galera_ip_list %s\ --internal_vip %s --openstack_index %d --zoo_ip_list %s --keystone_user %s\ --keystone_pass %s --cmon_user %s --cmon_pass %s --monitor_galera %s" % ( self_ip, keystone_ip, " ".join(galera_ip_list), internal_vip, (openstack_host_list.index(self_host) + 1), " ".join(zoo_ip_list), keystone_db_user, keystone_db_pass, cmon_db_user, cmon_db_pass, monitor_galera, ) ) if external_vip: cmd += " --external_vip %s" % external_vip sudo(cmd)
def get_data_ip(host_str): tgt_ip = None tgt_gw= None data_ip_info = getattr(testbed, 'control_data', None) if data_ip_info: if host_str in data_ip_info.keys(): tgt_ip = str(IPNetwork(data_ip_info[host_str]['ip']).ip) tgt_gw = data_ip_info[host_str]['gw'] else: tgt_ip = hstr_to_ip(host_str) else: tgt_ip = hstr_to_ip(host_str) return (tgt_ip, tgt_gw)
def detach_vrouter_node(*args): """Detaches one/more compute node from the existing cluster.""" cfgm_host = get_control_host_string(env.roledefs['cfgm'][0]) cfgm_host_password = env.passwords[env.roledefs['cfgm'][0]] cfgm_ip = hstr_to_ip(cfgm_host) for host_string in args: compute_hostname = socket.gethostbyaddr(hstr_to_ip(host_string))[0].split('.')[0] with settings(host_string=host_string, warn_only=True): run("service supervisor-vrouter stop") with settings(host_string=cfgm_host, pasword=cfgm_host_password): run("python /opt/contrail/utils/provision_vrouter.py --host_name %s --host_ip %s --api_server_ip %s --oper del" % (compute_hostname, host_string.split('@')[1], cfgm_ip)) execute("restart_control")
def detach_vrouter_node(*args): """Detaches one/more compute node from the existing cluster.""" cfgm_host = get_control_host_string(env.roledefs['cfgm'][0]) cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) cfgm_ip = hstr_to_ip(cfgm_host) nova_compute = "openstack-nova-compute" for host_string in args: with settings(host_string=host_string, warn_only=True): sudo("service supervisor-vrouter stop") if detect_ostype() in ['ubuntu']: nova_compute = "nova-compute" mode = get_mode(host_string) if (mode == 'vcenter'): nova_compute = "" if (nova_compute != ""): sudo("service %s stop" % nova_compute) compute_hostname = sudo("hostname") with settings(host_string=env.roledefs['cfgm'][0], pasword=cfgm_host_password): sudo( "python /opt/contrail/utils/provision_vrouter.py --host_name %s --host_ip %s --api_server_ip %s --oper del %s" % (compute_hostname, host_string.split('@')[1], cfgm_ip, get_mt_opts())) execute("restart_control")
def fix_restart_xinetd_conf_node(*args): """Fix contrail-mysqlprobe to accept connection only from this node, USAGE:fab fix_restart_xinetd_conf_node:[email protected],[email protected]""" for host_string in args: self_ip = hstr_to_ip(get_control_host_string(host_string)) run("sed -i -e 's#only_from = 0.0.0.0/0#only_from = %s 127.0.0.1#' /etc/xinetd.d/contrail-mysqlprobe" % self_ip) run("service xinetd restart") run("chkconfig xinetd on")
def drop_analytics_keyspace_node(*args): for host_string in args: with settings(host_string=host_string, warn_only=True): CASSANDRA_CMD = 'cqlsh %s -f ' % hstr_to_ip( get_control_host_string(host_string)) print "Dropping analytics keyspace.. this may take a while.." sudo( "echo 'describe keyspace \"ContrailAnalytics\";' > /tmp/cassandra_commands_file" ) if sudo(CASSANDRA_CMD + '/tmp/cassandra_commands_file').succeeded: sudo( "echo 'drop keyspace \"ContrailAnalytics\";' > /tmp/cassandra_commands_file" ) if not sudo(CASSANDRA_CMD + '/tmp/cassandra_commands_file').succeeded: print "WARN: Drop keyspace ContrailAnalytics failed.." else: print "INFO: keyspace ContrailAnalytics is dropped.." print "INFO: if snapshots are created, manual deletion may be required to free up disk.." sudo( "echo 'drop keyspace \"ContrailAnalyticsCql\";' > /tmp/cassandra_commands_file" ) if not sudo(CASSANDRA_CMD + '/tmp/cassandra_commands_file').succeeded: print "WARN: Drop keyspace ContrailAnalyticsCql failed.." else: print "INFO: keyspace ContrailAnalyticsCql is dropped.." print "INFO: if snapshots are created, manual deletion may be required to free up disk.."
def verfiy_and_update_hosts(host_name, host_string): # Need to have the alias created to map to the hostname # this is required for erlang node to cluster using # the same interface that is used for rabbitMQ TCP listener with settings(hide('stderr'), warn_only=True): if sudo('grep %s /etc/hosts' % (host_name+ctrl)).failed: sudo("echo '%s %s %s' >> /etc/hosts" % (hstr_to_ip(get_control_host_string(host_string)), host_name, host_name+ctrl))
def setup_cmon_param_zkonupgrade_node(*args): if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping cmon zookeeper setup." return for host_string in args: cmon_param = '/etc/contrail/ha/cmon_param' zoo_ip_list = [hstr_to_ip(get_control_host_string(\ cassandra_host)) for cassandra_host in env.roledefs['database']] zk_servers_ports = ','.join(['%s:2181' %(s) for s in zoo_ip_list]) zks = 'ZK_SERVER_IP="%s"' % (zk_servers_ports) monitor_galera="False" if get_contrail_internal_vip(): monitor_galera="True" # Assuming that keystone is the user and pass # if changed we need to fetch and update these fields keystone_db_user="******" keystone_db_pass="******" cmon_db_user="******" cmon_db_pass="******" sudo("grep -q 'ZK_SERVER_IP' %s || echo '%s' >> %s" % (cmon_param, zks, cmon_param)) sudo("grep -q 'OS_KS_USER' %s || echo 'OS_KS_USER=%s' >> %s" % (cmon_param, keystone_db_user, cmon_param)) sudo("grep -q 'OS_KS_PASS' %s || echo 'OS_KS_PASS=%s' >> %s" % (cmon_param, keystone_db_pass, cmon_param)) sudo("grep -q 'CMON_USER' %s || echo 'CMON_USER=%s' >> %s" % (cmon_param, cmon_db_user, cmon_param)) sudo("grep -q 'CMON_PASS' %s || echo 'CMON_PASS=%s' >> %s" % (cmon_param, cmon_db_pass, cmon_param)) sudo("grep -q 'MONITOR_GALERA' %s || echo 'MONITOR_GALERA=%s' >> %s" % (cmon_param, monitor_galera, cmon_param))
def purge_node_from_database(del_db_node): del_db_ctrl_ip = hstr_to_ip(get_control_host_string(del_db_node)) with settings(host_string = env.roledefs['database'][0], warn_only = True): is_part_of_db = local('nodetool status | grep %s' % del_db_ctrl_ip).succeeded if not is_part_of_db: print "Node %s is not part of DB Cluster", del_db_node return is_seed = local('grep "\- seeds: " /etc/cassandra/cassandra.yaml | grep %s' % del_db_ctrl_ip).succeeded is_alive = local('nodetool status | grep %s | grep "UN"' % del_db_ctrl_ip).succeeded if is_seed: # If the node to be removed is a seed node, then we need to re-establish other nodes # as seed node before removing this node. print "Removing the seed node %s from DB Cluster and re-electing new seed nodes", del_db_ctrl_ip for db in env.roledefs['database']: with settings(host_string = db): cmd = frame_vnc_database_cmd(db, cmd='readjust-cassandra-seed-list') sudo(cmd) if is_alive and ping_test(del_db_node): # Node is active in the cluster. The tokens need to be redistributed before the # node can be brought down. with settings(host_string=del_db_node): cmd = frame_vnc_database_cmd(del_db_node, cmd='decommission-cassandra-node') sudo(cmd) else: # Node is part of the cluster but not active. Hence, remove the node # from the cluster with settings(host_string=env.roledefs['database'][0]): cmd = frame_vnc_database_cmd(del_db_node, cmd='remove-cassandra-node') sudo(cmd)
def setup_vtb(): """Setup virtual testbed with inputs from testbed.py.""" vm_hostname = hstr_to_ip(env.host_string) compute_host = env.vms[env.host_string]['compute'] # build_template try: distro = env.ostypes[env.host_string] vm = importlib.import_module( 'fabfile.contraillabs.vtb.templates.%s_vm_template' % distro) except ImportError: vm = importlib.import_module('templates.%s_vm_template' % distro) vm_config = vm.template.safe_substitute({ '__name__' : vm_hostname, '__host_interface__' : get_compute_host_intf(compute_host), '__bridge_interface__' : 'br0', '__ipaddress__' : env.vms[env.host_string]['private_ip'], }) vm_ini_file = '%s.ini' % vm_hostname with open(vm_ini_file, 'w+') as fd: fd.write(vm_config) vmi = execute('create_vm', compute_host, vm_ini_file)['<local-only>'] if 'eth1' in vmi.keys(): testbed = 'fabfile/testbeds/testbed.py' local("sed -i 's/%s/root@%s/g' %s" % (env.host_string, vmi['eth0'], testbed))
def fix_restart_xinetd_conf_node(*args): """Fix contrail-mysqlprobe to accept connection only from this node, USAGE:fab fix_restart_xinetd_conf_node:[email protected],[email protected]""" for host_string in args: self_ip = hstr_to_ip(get_control_host_string(host_string)) sudo("sed -i -e 's#only_from = 0.0.0.0/0#only_from = %s 127.0.0.1#' /etc/xinetd.d/contrail-mysqlprobe" % self_ip) sudo("service xinetd restart") sudo("chkconfig xinetd on")
def config_rabbitmq(): rabbit_hosts = [] rabbit_conf = '/etc/rabbitmq/rabbitmq.config' if len(env.roledefs['rabbit']) <= 1 and detect_ostype() == 'redhat': print "CONFIG_RABBITMQ: Skip creating rabbitmq.config for Single node setup" return for host_string in env.roledefs['rabbit']: with settings(host_string=host_string, password=env.passwords[host_string]): host_name = sudo('hostname -s') + ctrl rabbit_hosts.append("\'rabbit@%s\'" % host_name) rabbit_hosts = ', '.join(rabbit_hosts) rabbitmq_config_template = rabbitmq_config if len(env.roledefs['rabbit']) == 1: rabbitmq_config_template = rabbitmq_config_single_node rabbitmq_configs = rabbitmq_config_template.template.safe_substitute({ '__control_intf_ip__': hstr_to_ip(get_control_host_string(env.host_string)), '__rabbit_hosts__': rabbit_hosts, }) tmp_fname = "/tmp/rabbitmq_%s.config" % env.host_string cfg_file = open(tmp_fname, 'w') cfg_file.write(rabbitmq_configs) cfg_file.close() put(tmp_fname, "/etc/rabbitmq/rabbitmq.config", use_sudo=True) local("rm %s" % (tmp_fname))
def setup_vtb(): """Setup virtual testbed with inputs from testbed.py.""" vm_hostname = hstr_to_ip(env.host_string) compute_host = env.vms[env.host_string]['compute'] # build_template try: distro = env.ostypes[env.host_string] vm = importlib.import_module( 'fabfile.contraillabs.vtb.templates.%s_vm_template' % distro) except ImportError: vm = importlib.import_module('templates.%s_vm_template' % distro) vm_config = vm.template.safe_substitute({ '__name__': vm_hostname, '__host_interface__': get_compute_host_intf(compute_host), '__bridge_interface__': 'br0', '__ipaddress__': env.vms[env.host_string]['private_ip'], }) vm_ini_file = '%s.ini' % vm_hostname with open(vm_ini_file, 'w+') as fd: fd.write(vm_config) vmi = execute('create_vm', compute_host, vm_ini_file)['<local-only>'] if 'eth1' in vmi.keys(): testbed = 'fabfile/testbeds/testbed.py' local("sed -i 's/%s/root@%s/g' %s" % (env.host_string, vmi['eth0'], testbed))
def drop_analytics_keyspace_node(*args): for host_string in args: with settings(host_string=host_string): CASSANDRA_CMD = 'cqlsh %s 9160 -f ' % hstr_to_ip(host_string) print "Dropping analytics keyspace.. this may take a while.." sudo("echo 'drop keyspace \"ContrailAnalytics\";' > /tmp/cassandra_commands_file") if not sudo(CASSANDRA_CMD + '/tmp/cassandra_commands_file').succeeded: print "WARN: Drop analytics keyspace failed.."
def mount_glance_images(): nfs_server = get_from_testbed_dict('ha', 'nfs_server', hstr_to_ip(env.roledefs['compute'][0])) nfs_glance_path = get_from_testbed_dict('ha', 'nfs_glance_path', '/var/tmp/glance-images/') with settings(warn_only=True): out = run('sudo mount %s:%s /var/lib/glance/images' % (nfs_server, nfs_glance_path)) if out.failed and 'already mounted' not in out: raise RuntimeError(out) if run('grep "%s:%s /var/lib/glance/images nfs" /etc/fstab' % (nfs_server, nfs_glance_path)).failed: run('echo "%s:%s /var/lib/glance/images nfs nfsvers=3,hard,intr,auto 0 0" >> /etc/fstab' % (nfs_server, nfs_glance_path))
def detach_vrouter_node(*args): """Detaches one/more compute node from the existing cluster.""" cfgm_host = get_control_host_string(env.roledefs['cfgm'][0]) cfgm_host_password = get_env_passwords(env.roledefs['cfgm'][0]) cfgm_ip = hstr_to_ip(cfgm_host) nova_compute = "openstack-nova-compute" if detect_ostype() in ['ubuntu']: nova_compute = "nova-compute" for host_string in args: compute_hostname = socket.gethostbyaddr(hstr_to_ip(host_string))[0].split('.')[0] with settings(host_string=host_string, warn_only=True): sudo("service supervisor-vrouter stop") sudo("service %s stop" % nova_compute) with settings(host_string=cfgm_host, pasword=cfgm_host_password): sudo("python /opt/contrail/utils/provision_vrouter.py --host_name %s --host_ip %s --api_server_ip %s --oper del %s" % (compute_hostname, host_string.split('@')[1], cfgm_ip, get_mt_opts())) execute("restart_control")
def fix_wsrep_cluster_address(): openstack_host_list = [get_control_host_string(openstack_host) for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host) for galera_host in openstack_host_list] with settings(host_string=env.roledefs['openstack'][0], password=get_env_passwords(env.roledefs['openstack'][0])): wsrep_conf = '/etc/mysql/my.cnf' if detect_ostype() in ['ubuntu']: wsrep_conf = '/etc/mysql/conf.d/wsrep.cnf' sudo('sed -ibak "s#wsrep_cluster_address=.*#wsrep_cluster_address=gcomm://%s:4567#g" %s' % (':4567,'.join(galera_ip_list), wsrep_conf))
def fix_wsrep_cluster_address(): openstack_host_list = [get_control_host_string(openstack_host) for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host) for galera_host in openstack_host_list] with settings(host_string=env.roledefs['openstack'][0], password=env.passwords[env.roledefs['openstack'][0]]): wsrep_conf = '/etc/mysql/my.cnf' if detect_ostype() in ['Ubuntu']: wsrep_conf = '/etc/mysql/conf.d/wsrep.cnf' run('sed -ibak "s#wsrep_cluster_address=.*#wsrep_cluster_address=gcomm://%s:4567#g" %s' % (':4567,'.join(galera_ip_list), wsrep_conf))
def mount_glance_images(): nfs_server = get_from_testbed_dict('ha', 'nfs_server', hstr_to_ip(get_nfs_server())) nfs_glance_path = get_from_testbed_dict('ha', 'nfs_glance_path', '/var/tmp/glance-images/') with settings(warn_only=True): out = sudo('sudo mount %s:%s /var/lib/glance/images' % (nfs_server, nfs_glance_path)) if out.failed and 'already mounted' not in out: raise RuntimeError(out) if sudo('grep "%s:%s /var/lib/glance/images nfs" /etc/fstab' % (nfs_server, nfs_glance_path)).failed: sudo('echo "%s:%s /var/lib/glance/images nfs nfsvers=3,hard,intr,auto 0 0" >> /etc/fstab' % (nfs_server, nfs_glance_path))
def purge_node_from_rabbitmq_cluster(del_rabbitmq_node, role): if get_from_testbed_dict('openstack', 'manage_amqp', 'no') == 'no' and\ role == 'openstack': # We are not managing the RabbitMQ server. No-op. return if get_contrail_internal_vip() != get_openstack_internal_vip() and\ role == 'cfgm': # Openstack and Contrail are in two different nodes. Cfgm # rabbitmq will point to the Openstack node. No-op. return env.roledefs['rabbit'] = env.roledefs[role] del_rabbitmq_ip = hstr_to_ip(del_rabbitmq_node) del_rabbitmq_ctrl_ip = hstr_to_ip(get_control_host_string(del_rabbitmq_node)) if ping_test(del_rabbitmq_node): with settings(host_string = del_rabbitmq_node, warn_only = True): sudo("rabbitmqctl stop_app") sudo("rabbitmqctl reset") sudo("service supervisor-support-service stop") sudo("mv /var/lib/rabbitmq/.erlang.cookie /var/lib/rabbitmq/.erlang.cookie.removed") sudo("mv /etc/rabbitmq/rabbitmq.config /etc/rabbitmq/rabbitmq.config.removed") else: # If the node is not reachable, then delete the node remotely from one # of the nodes in the cluster. with settings(host_string = env.roledefs['rabbit'][0], warn_only = True): hostname = local('getent hosts %s | awk \'{print $3\'}' % del_rabbitmq_ctrl_ip, capture = True) sudo("rabbitmqctl forget_cluster_node rabbit@%s" % hostname) # Giving some time for the other nodes to re-adjust the cluster, time.sleep(30) execute(config_rabbitmq) for host_string in env.roledefs[role]: with settings(host_string = host_string): sudo("service rabbitmq-server restart") # Give time for RabbitMQ to recluster time.sleep(30) result = execute(verify_cluster_status) if False in result.values(): print "Unable to recluster RabbitMQ cluster after removing the node %s" % del_rabbitmq_node exit(1)
def detach_vrouter_node(*args): """Detaches one/more compute node from the existing cluster.""" cfgm_host = get_control_host_string(env.roledefs['cfgm'][0]) cfgm_host_password = env.passwords[env.roledefs['cfgm'][0]] cfgm_ip = hstr_to_ip(cfgm_host) nova_compute = "openstack-nova-compute" if detect_ostype() in ['ubuntu']: nova_compute = "nova-compute" for host_string in args: compute_hostname = socket.gethostbyaddr( hstr_to_ip(host_string))[0].split('.')[0] with settings(host_string=host_string, warn_only=True): sudo("service supervisor-vrouter stop") sudo("service %s stop" % nova_compute) with settings(host_string=cfgm_host, pasword=cfgm_host_password): sudo( "python /opt/contrail/utils/provision_vrouter.py --host_name %s --host_ip %s --api_server_ip %s --oper del" % (compute_hostname, host_string.split('@')[1], cfgm_ip)) execute("restart_control")
def fixup_restart_haproxy_in_collector_node(*args): contrail_analytics_api_server_lines = '' space = ' ' * 3 for host_string in env.roledefs['collector']: server_index = env.roledefs['collector'].index(host_string) + 1 mgmt_host_ip = hstr_to_ip(host_string) host_ip = hstr_to_ip(get_control_host_string(host_string)) contrail_analytics_api_server_lines +=\ '%s server %s %s:9081 check inter 2000 rise 2 fall 3\n'\ % (space, host_ip, host_ip) for host_string in env.roledefs['collector']: haproxy_config = collector_haproxy.template.safe_substitute({ '__contrail_analytics_api_backend_servers__' : contrail_analytics_api_server_lines, '__contrail_hap_user__': 'haproxy', '__contrail_hap_passwd__': 'contrail123', }) for host_string in args: with settings(host_string=host_string): # chop old settings including pesky default from pkg... tmp_fname = "/tmp/haproxy-%s-config" % (host_string) get("/etc/haproxy/haproxy.cfg", tmp_fname) with settings(warn_only=True): local("sed -i -e '/^#contrail-collector-marker-start/,/^#contrail-collector-marker-end/d' %s" % (tmp_fname)) local("sed -i -e 's/ssl-relay 0.0.0.0:8443/ssl-relay 0.0.0.0:5002/' %s" % (tmp_fname)) local("sed -i -e 's/option\shttplog/option tcplog/' %s" % (tmp_fname)) local("sed -i -e 's/maxconn 4096/maxconn 100000/' %s" % (tmp_fname)) # ...generate new ones cfg_file = open(tmp_fname, 'a') cfg_file.write(haproxy_config) cfg_file.close() put(tmp_fname, "/etc/haproxy/haproxy.cfg") local("rm %s" %(tmp_fname)) # haproxy enable with settings(host_string=host_string, warn_only=True): run("chkconfig haproxy on") enable_haproxy() run("service haproxy restart")
def issu_contrail_generate_moreconf(final_conf): sudo('touch %s' %(final_conf)) cmd = 'openstack-config --set %s DEFAULTS' %(final_conf) new_api_info = ','.join(["'%s':['root', '%s']" %(hstr_to_ip(get_control_host_string(config_host)), env.passwords[config_host]) for config_host in env.roledefs['cfgm']]) new_api_info = '"{'+new_api_info+'}"' sudo('%s new_api_info %s' %(cmd, new_api_info)) db_host_info = ','.join(["'%s':'%s'" %(hstr_to_ip(get_control_host_string(host)), get_real_hostname(host)) for host in env.roledefs['database']]) db_host_info = '"{'+db_host_info+'}"' sudo('%s db_host_info %s' %(cmd, db_host_info)) config_host_info = ','.join(["'%s':'%s'" %(hstr_to_ip(get_control_host_string(host)), get_real_hostname(host)) for host in env.roledefs['cfgm']]) config_host_info = '"{'+config_host_info+'}"' sudo('%s config_host_info %s' %(cmd, config_host_info)) analytics_host_info = ','.join(["'%s':'%s'" %(hstr_to_ip(get_control_host_string(host)), get_real_hostname(host)) for host in env.roledefs['collector']]) analytics_host_info = '"{'+analytics_host_info+'}"' sudo('%s analytics_host_info %s' %(cmd, analytics_host_info)) control_host_info = ','.join(["'%s':'%s'" %(hstr_to_ip(get_control_host_string(host)), get_real_hostname(host)) for host in env.roledefs['control']]) control_host_info = '"{'+control_host_info+'}"' sudo('%s control_host_info %s' %(cmd, control_host_info)) admin_user, admin_password = get_authserver_credentials() sudo('%s admin_password %s' %(cmd, admin_password)) sudo('%s admin_user %s' %(cmd, admin_user)) sudo('%s admin_tenant_name %s' %(cmd, get_admin_tenant_name())) sudo('%s openstack_ip %s' %(cmd, get_authserver_ip())) sudo('%s api_server_ip %s' %(cmd, hstr_to_ip(get_control_host_string(env.roledefs['cfgm'][0]))))
def purge_node_from_rabbitmq_cluster(del_rabbitmq_node, role): if get_from_testbed_dict('openstack', 'manage_amqp', 'no') == 'no' and\ role == 'openstack': # We are not managing the RabbitMQ server. No-op. return env.roledefs['rabbit'] = env.roledefs[role] del_rabbitmq_ip = hstr_to_ip(del_rabbitmq_node) del_rabbitmq_ctrl_ip = hstr_to_ip(get_control_host_string(del_rabbitmq_node)) if ping_test(del_rabbitmq_node): with settings(host_string = del_rabbitmq_node, warn_only = True): sudo("rabbitmqctl stop_app") sudo("rabbitmqctl reset") sudo("service supervisor-support-service stop") sudo("mv /var/lib/rabbitmq/.erlang.cookie /var/lib/rabbitmq/.erlang.cookie.removed") sudo("mv /etc/rabbitmq/rabbitmq.config /etc/rabbitmq/rabbitmq.config.removed") else: # If the node is not reachable, then delete the node remotely from one # of the nodes in the cluster. with settings(host_string = env.roledefs['rabbit'][0], warn_only = True): hostname = local('getent hosts %s | awk \'{print $3\'}' % del_rabbitmq_ctrl_ip, capture = True) sudo("rabbitmqctl forget_cluster_node rabbit@%s" % hostname) # Giving some time for the other nodes to re-adjust the cluster, time.sleep(30) execute(config_rabbitmq) for host_string in env.roledefs[role]: with settings(host_string = host_string): sudo("service rabbitmq-server restart") # Give time for RabbitMQ to recluster time.sleep(30) result = execute(verify_cluster_status) if False in result.values(): print "Unable to recluster RabbitMQ cluster after removing the node %s" % del_rabbitmq_node exit(1)
def get_compute_host_intf(compute): compute_ip = hstr_to_ip(compute) with settings(host_string=compute, password=get_env_passwords(compute)): get_name = "ifconfig -a | grep -B1 %s | cut -d' ' -f1" % compute_ip host_intf = sudo(get_name).strip() if host_intf == 'br0': get_hw_addr = "ifconfig br0 | grep 'HWaddr' | awk '{print $5}'" hw_addr = sudo(get_hw_addr).strip() get_name = "ifconfig -a | grep '%s' | awk '{print $1}'" % hw_addr host_intf_list = sudo(get_name).strip().split('\n') host_intf_list = map(str.strip, host_intf_list) host_intf_list.remove('br0') host_intf = host_intf_list[0] return host_intf
def setup_apiserver_ssl_certs_node(*nodes): default_certfile = '/etc/contrail/ssl/certs/contrail.pem' default_keyfile = '/etc/contrail/ssl/private/contrail.key' default_cafile = '/etc/contrail/ssl/certs/contrail_ca.pem' contrailcertbundle = get_apiserver_cert_bundle() ssl_certs = ((get_apiserver_certfile(), default_certfile), (get_apiserver_keyfile(), default_keyfile), (get_apiserver_cafile(), default_cafile)) index = env.roledefs['cfgm'].index(env.host_string) + 1 for node in nodes: with settings(host_string=node, password=get_env_passwords(node)): for ssl_cert, default in ssl_certs: if ssl_cert == default: # Clear old certificate sudo('rm -f %s' % ssl_cert) sudo('rm -f %s' % contrailcertbundle) for ssl_cert, default in ssl_certs: if ssl_cert == default: cfgm_host = env.roledefs['cfgm'][0] if index == 1: if not exists(ssl_cert, use_sudo=True): print "Creating apiserver SSL certs in first cfgm node" cfgm_ip = get_contrail_internal_vip() or hstr_to_ip(get_control_host_string(cfgm_host)) sudo('create-api-ssl-certs.sh %s' % cfgm_ip) else: with settings(host_string=cfgm_host, password=get_env_passwords(cfgm_host)): while not exists(ssl_cert, use_sudo=True): print "Wait for SSL certs to be created in first cfgm" sleep(0.1) print "Get SSL cert(%s) from first cfgm" % ssl_cert tmp_dir= tempfile.mkdtemp() tmp_fname = os.path.join(tmp_dir, os.path.basename(ssl_cert)) get_as_sudo(ssl_cert, tmp_fname) print "Copy to this(%s) cfgm node" % env.host_string sudo('mkdir -p /etc/contrail/ssl/certs/') sudo('mkdir -p /etc/contrail/ssl/private/') put(tmp_fname, ssl_cert, use_sudo=True) os.remove(tmp_fname) elif os.path.isfile(ssl_cert): print "Certificate (%s) exists locally" % ssl_cert put(ssl_cert, default, use_sudo=True) elif exists(ssl_cert, use_sudo=True): print "Certificate (%s) exists in cfgm node" % ssl_cert else: raise RuntimeError("%s doesn't exists locally or in cfgm node" % ssl_cert) if not exists(contrailcertbundle, use_sudo=True): ((certfile, _), (keyfile, _), (cafile, _)) = ssl_certs sudo('cat %s %s > %s' % (certfile, cafile, contrailcertbundle)) sudo("chown -R contrail:contrail /etc/contrail/ssl")
def purge_node_from_rabbitmq_cluster(del_rabbitmq_node, role): if get_from_testbed_dict("openstack", "manage_amqp", "no") == "no" and role == "openstack": # We are not managing the RabbitMQ server. No-op. return env.roledefs["rabbit"] = env.roledefs[role] del_rabbitmq_ip = hstr_to_ip(del_rabbitmq_node) del_rabbitmq_ctrl_ip = hstr_to_ip(get_control_host_string(del_rabbitmq_node)) if ping_test(del_rabbitmq_node): with settings(host_string=del_rabbitmq_node, warn_only=True): sudo("rabbitmqctl stop_app") sudo("rabbitmqctl reset") sudo("service supervisor-support-service stop") sudo("mv /var/lib/rabbitmq/.erlang.cookie /var/lib/rabbitmq/.erlang.cookie.removed") sudo("mv /etc/rabbitmq/rabbitmq.config /etc/rabbitmq/rabbitmq.config.removed") else: # If the node is not reachable, then delete the node remotely from one # of the nodes in the cluster. with settings(host_string=env.roledefs["rabbit"][0], warn_only=True): hostname = local("getent hosts %s | awk '{print $3'}" % del_rabbitmq_ctrl_ip, capture=True) sudo("rabbitmqctl forget_cluster_node rabbit@%s" % hostname) # Giving some time for the other nodes to re-adjust the cluster, time.sleep(30) execute(config_rabbitmq) for host_string in env.roledefs[role]: with settings(host_string=host_string): sudo("service rabbitmq-server restart") # Give time for RabbitMQ to recluster time.sleep(30) result = execute(verify_cluster_status) if False in result.values(): print "Unable to recluster RabbitMQ cluster after removing the node %s" % del_rabbitmq_node exit(1)
def setup_cmon_schema(): """Task to configure cmon schema in the openstack nodes to monitor galera cluster""" if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping cmon schema setup." return openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host)\ for galera_host in openstack_host_list] internal_vip = get_openstack_internal_vip() mysql_token = sudo("cat /etc/contrail/mysql.token") pdist = detect_ostype() if pdist in ['ubuntu']: mysql_svc = 'mysql' elif pdist in ['centos', 'redhat']: mysql_svc = 'mysqld' # Create cmon schema sudo('mysql -u root -p%s -e "CREATE SCHEMA IF NOT EXISTS cmon"' % mysql_token) sudo('mysql -u root -p%s < /usr/local/cmon/share/cmon/cmon_db.sql' % mysql_token) sudo('mysql -u root -p%s < /usr/local/cmon/share/cmon/cmon_data.sql' % mysql_token) # insert static data sudo( 'mysql -u root -p%s -e "use cmon; insert into cluster(type) VALUES (\'galera\')"' % mysql_token) host_list = galera_ip_list + ['localhost', '127.0.0.1', internal_vip] # Create cmon user for host in host_list: mysql_cmon_user_cmd = 'mysql -u root -p%s -e "CREATE USER \'cmon\'@\'%s\' IDENTIFIED BY \'cmon\'"' % ( mysql_token, host) with settings(hide('everything'), warn_only=True): sudo(mysql_cmon_user_cmd) mysql_cmd = "mysql -uroot -p%s -e" % mysql_token # Grant privilages for cmon user. for host in host_list: sudo( '%s "GRANT ALL PRIVILEGES on *.* TO cmon@%s IDENTIFIED BY \'cmon\' WITH GRANT OPTION"' % (mysql_cmd, host)) # Restarting mysql in all openstack nodes for host_string in env.roledefs['openstack']: with settings(host_string=host_string): sudo("service %s restart" % mysql_svc)
def setup_galera_cluster(): """Task to cluster the openstack nodes with galera""" if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping galera cluster setup." return if env.roledefs['openstack'].index(env.host_string) == 0: execute('setup_passwordless_ssh', *env.roledefs['openstack']) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host)\ for galera_host in openstack_host_list] keystone_ip = get_keystone_ip() internal_vip = get_openstack_internal_vip() with cd(INSTALLER_DIR): sudo("setup-vnc-galera\ --self_ip %s --keystone_ip %s --galera_ip_list %s\ --internal_vip %s --openstack_index %d" % (self_ip, keystone_ip, ' '.join(galera_ip_list), internal_vip, (openstack_host_list.index(self_host) + 1)))
def setup_galera_cluster(): """Task to cluster the openstack nodes with galera""" if len(env.roledefs['openstack']) <= 1: print "Single Openstack cluster, skipping galera cluster setup." return if env.roledefs['openstack'].index(env.host_string) == 0: execute('setup_passwordless_ssh', *env.roledefs['openstack']) self_host = get_control_host_string(env.host_string) self_ip = hstr_to_ip(self_host) openstack_host_list = [get_control_host_string(openstack_host)\ for openstack_host in env.roledefs['openstack']] galera_ip_list = [hstr_to_ip(galera_host)\ for galera_host in openstack_host_list] keystone_ip = get_keystone_ip() internal_vip = get_openstack_internal_vip() with cd(INSTALLER_DIR): sudo("setup-vnc-galera\ --self_ip %s --keystone_ip %s --galera_ip_list %s\ --internal_vip %s --openstack_index %d" % ( self_ip, keystone_ip, ' '.join(galera_ip_list), internal_vip, (openstack_host_list.index(self_host) + 1)))
def issu_contrail_switch_collector_in_compute_node(*args): for host in args: collector_list = '' with settings(host_string=host): for i in range(0, len(env.roledefs['collector'])): collector_list += "%s:8086 " %(hstr_to_ip(get_control_host_string(env.roledefs['collector'][i]))) with settings(warn_only=True): file_list = sudo('ls /etc/contrail/contrail-tor-agent*') if file_list.succeeded: file_list = file_list.split() else: file_list = [] file_list.append('/etc/contrail/contrail-vrouter-agent.conf') for cfile in file_list: run('openstack-config --set %s DEFAULT collectors "%s"' % (cfile, collector_list)) run('openstack-config --set /etc/contrail/contrail-vrouter-nodemgr.conf COLLECTOR server_list "%s"' % (collector_list))
def rabbitmq_env(): erl_node_name = None rabbit_env_conf = "/etc/rabbitmq/rabbitmq-env.conf" with settings(host_string=env.host_string, password=get_env_passwords(env.host_string)): host_name = sudo("hostname -s") + ctrl erl_node_name = "rabbit@%s" % (host_name) rabbitmq_env_template = rabbitmq_env_conf rmq_env_conf = rabbitmq_env_template.template.safe_substitute( {"__erl_node_ip__": hstr_to_ip(get_control_host_string(env.host_string)), "__erl_node_name__": erl_node_name} ) tmp_fname = "/tmp/rabbitmq-env-%s.conf" % env.host_string cfg_file = open(tmp_fname, "w") cfg_file.write(rmq_env_conf) cfg_file.close() put(tmp_fname, rabbit_env_conf, use_sudo=True) local("rm %s" % (tmp_fname))
def setup_apiserver_ssl_certs_node(*nodes): default_certfile = '/etc/contrail/ssl/certs/contrail.pem' default_keyfile = '/etc/contrail/ssl/private/contrail.key' default_cafile = '/etc/contrail/ssl/certs/contrail_ca.pem' contrailcertbundle = get_apiserver_cert_bundle() ssl_certs = ((get_apiserver_certfile(), default_certfile), (get_apiserver_keyfile(), default_keyfile), (get_apiserver_cafile(), default_cafile)) index = env.roledefs['cfgm'].index(env.host_string) + 1 for node in nodes: with settings(host_string=node, password=get_env_passwords(node)): for ssl_cert, default in ssl_certs: if ssl_cert == default: # Clear old certificate sudo('rm -f %s' % ssl_cert) sudo('rm -f %s' % contrailcertbundle) for ssl_cert, default in ssl_certs: if ssl_cert == default: cfgm_host = env.roledefs['cfgm'][0] if index == 1: if not exists(ssl_cert, use_sudo=True): print "Creating apiserver SSL certs in first cfgm node" cfgm_ip = get_contrail_internal_vip() or hstr_to_ip(cfgm_host) sudo('create-api-ssl-certs.sh %s' % cfgm_ip) else: with settings(host_string=cfgm_host, password=get_env_passwords(cfgm_host)): while not exists(ssl_cert, use_sudo=True): print "Wait for SSL certs to be created in first cfgm" sleep(0.1) print "Get SSL cert(%s) from first cfgm" % ssl_cert tmp_fname = os.path.join('/tmp', os.path.basename(ssl_cert)) get_as_sudo(ssl_cert, tmp_fname) print "Copy to this(%s) cfgm node" % env.host_string put(tmp_fname, ssl_cert, use_sudo=True) os.remove(tmp_fname) elif os.path.isfile(ssl_cert): print "Certificate (%s) exists locally" % ssl_cert put(ssl_cert, default, use_sudo=True) elif exists(ssl_cert, use_sudo=True): print "Certificate (%s) exists in cfgm node" % ssl_cert else: raise RuntimeError("%s doesn't exists locally or in cfgm node" % ssl_cert) if not exists(contrailcertbundle, use_sudo=True): ((certfile, _), (keyfile, _), (cafile, _)) = ssl_certs sudo('cat %s %s > %s' % (certfile, cafile, contrailcertbundle)) sudo("chown -R contrail:contrail /etc/contrail/ssl")
def rabbitmq_env(): erl_node_name = None rabbit_env_conf = '/etc/rabbitmq/rabbitmq-env.conf' with settings(host_string=env.host_string, password=env.passwords[env.host_string]): host_name = run('hostname -s') + ctrl erl_node_name = "rabbit@%s" % (host_name) rabbitmq_env_template = rabbitmq_env_conf rmq_env_conf = rabbitmq_env_template.template.safe_substitute({ '__erl_node_ip__' : hstr_to_ip(get_control_host_string(env.host_string)), '__erl_node_name__' : erl_node_name, }) tmp_fname = "/tmp/rabbitmq-env-%s.conf" % env.host_string cfg_file = open(tmp_fname, 'w') cfg_file.write(rmq_env_conf) cfg_file.close() put(tmp_fname, rabbit_env_conf) local("rm %s" %(tmp_fname))
def rabbitmq_env(): erl_node_name = None rabbit_env_conf = '/etc/rabbitmq/rabbitmq-env.conf' with settings(host_string=env.host_string, password=get_env_passwords(env.host_string)): host_name = sudo('hostname -s') + ctrl erl_node_name = "rabbit@%s" % (host_name) rabbitmq_env_template = rabbitmq_env_conf rmq_env_conf = rabbitmq_env_template.template.safe_substitute({ '__erl_node_ip__' : hstr_to_ip(get_control_host_string(env.host_string)), '__erl_node_name__' : erl_node_name, }) tmp_fname = "/tmp/rabbitmq-env-%s.conf" % env.host_string cfg_file = open(tmp_fname, 'w') cfg_file.write(rmq_env_conf) cfg_file.close() put(tmp_fname, rabbit_env_conf, use_sudo=True) local("rm %s" %(tmp_fname))
def config_rabbitmq(): rabbit_hosts = [] rabbit_conf = '/etc/rabbitmq/rabbitmq.config' for host_string in env.roledefs['cfgm']: with settings(host_string=host_string, password=env.passwords[host_string]): host_name = run('hostname') rabbit_hosts.append("\'rabbit@%s\'" % host_name) rabbit_hosts = ', '.join(rabbit_hosts) rabbitmq_configs = rabbitmq_config.template.safe_substitute({ '__control_intf_ip__' : hstr_to_ip(get_control_host_string(env.host_string)), '__rabbit_hosts__' : rabbit_hosts, }) tmp_fname = "/tmp/rabbitmq_%s.config" % env.host_string cfg_file = open(tmp_fname, 'a') cfg_file.write(rabbitmq_configs) cfg_file.close() put(tmp_fname, "/etc/rabbitmq/rabbitmq.config") local("rm %s" %(tmp_fname))
def setup_keystone_ssl_certs_node(*nodes): default_certfile = '/etc/keystone/ssl/certs/keystone.pem' default_keyfile = '/etc/keystone/ssl/private/keystone.key' default_cafile = '/etc/keystone/ssl/certs/keystone_ca.pem' ssl_certs = ((get_keystone_certfile(), default_certfile), (get_keystone_keyfile(), default_keyfile), (get_keystone_cafile(), default_cafile)) index = env.roledefs['openstack'].index(env.host_string) + 1 for node in nodes: with settings(host_string=node, password=get_env_passwords(node)): for ssl_cert, default in ssl_certs: if ssl_cert == default: # Clear old certificate sudo('rm -f %s' % ssl_cert) for ssl_cert, default in ssl_certs: if ssl_cert == default: openstack_host = env.roledefs['openstack'][0] if index == 1: if not exists(ssl_cert, use_sudo=True): print "Creating keystone SSL certs in first openstack node" sudo('create-keystone-ssl-certs.sh %s' % ( get_openstack_internal_vip() or hstr_to_ip(openstack_host))) else: with settings(host_string=openstack_host, password=get_env_passwords(openstack_host)): while not exists(ssl_cert, use_sudo=True): print "Wait for SSL certs to be created in first openstack" sleep(0.1) print "Get SSL cert(%s) from first openstack" % ssl_cert tmp_fname = os.path.join('/tmp', os.path.basename(ssl_cert)) get_as_sudo(ssl_cert, tmp_fname) print "Copy to this(%s) openstack node" % env.host_string put(tmp_fname, ssl_cert, use_sudo=True) os.remove(tmp_fname) elif os.path.isfile(ssl_cert): print "Certificate (%s) exists locally" % ssl_cert put(ssl_cert, default, use_sudo=True) elif exists(ssl_cert, use_sudo=True): print "Certificate (%s) exists in openstack node" % ssl_cert pass else: raise RuntimeError("%s doesn't exists locally or in openstack node") sudo("chown -R keystone:keystone /etc/keystone/ssl")
def fix_memcache_conf_node(*args): """Increases the memcached memory to 2048 and listen address to mgmt ip. USAGE:fab fix_memcache_conf_node:[email protected],[email protected]""" memory = '2048' for host_string in args: listen_ip = hstr_to_ip(env.host_string) with settings(host_string=host_string, warn_only=True): if detect_ostype() == 'Ubuntu': memcache_conf='/etc/memcached.conf' if run('grep "\-m " %s' % memcache_conf).failed: #Write option to memcached config file run('echo "-m %s" >> %s' % (memory, memcache_conf)) else: run("sed -i -e 's/\-m.*/\-m %s/' %s" % (memory, memcache_conf)) if run('grep "\-l " %s' % memcache_conf).failed: #Write option to memcached config file run('echo "-l %s" >> %s' % (listen_ip, memcache_conf)) else: run("sed -i -e 's/\-l.*/\-l %s/' %s" % (listen_ip, memcache_conf)) else: memcache_conf='/etc/sysconfig/memcached'
def get_real_hostname(host_string): with settings(host_string = host_string): tgt_ip = hstr_to_ip(get_control_host_string(env.host_string)) tgt_hostname = sudo("hostname") return tgt_hostname
def fixup_restart_haproxy_in_openstack_node(*args): keystone_server_lines = '' keystone_admin_server_lines = '' glance_server_lines = '' cinder_server_lines = '' ceph_restapi_server_lines = '' nova_api_server_lines = '' nova_meta_server_lines = '' nova_vnc_server_lines = '' memcached_server_lines = '' rabbitmq_server_lines = '' mysql_server_lines = '' space = ' ' * 3 for host_string in env.roledefs['openstack']: server_index = env.roledefs['openstack'].index(host_string) + 1 mgmt_host_ip = hstr_to_ip(host_string) host_ip = hstr_to_ip(get_control_host_string(host_string)) keystone_server_lines +=\ '%s server %s %s:6000 check inter 2000 rise 2 fall 1\n'\ % (space, host_ip, host_ip) keystone_admin_server_lines +=\ '%s server %s %s:35358 check inter 2000 rise 2 fall 1\n'\ % (space, host_ip, host_ip) glance_server_lines +=\ '%s server %s %s:9393 check inter 2000 rise 2 fall 1\n'\ % (space, host_ip, host_ip) cinder_server_lines +=\ '%s server %s %s:9776 check inter 2000 rise 2 fall 3\n'\ % (space, host_ip, host_ip) ceph_restapi_server_lines +=\ '%s server %s %s:5006 check inter 2000 rise 2 fall 3\n'\ % (space, host_ip, host_ip) nova_api_server_lines +=\ '%s server %s %s:9774 check inter 2000 rise 2 fall 1\n'\ % (space, host_ip, host_ip) nova_meta_server_lines +=\ '%s server %s %s:9775 check inter 2000 rise 2 fall 1\n'\ % (space, host_ip, host_ip) nova_vnc_server_lines +=\ '%s server %s %s:6999 check inter 2000 rise 2 fall 3\n'\ % (space, mgmt_host_ip, mgmt_host_ip) if server_index <= 2: memcached_server_lines +=\ '%s server repcache%s %s:11211 check inter 2000 rise 2 fall 3\n'\ % (space, server_index, host_ip) if server_index == 1: rabbitmq_server_lines +=\ '%s server rabbit%s %s:5672 weight 200 check inter 2000 rise 2 fall 3\n'\ % (space, server_index, host_ip) else: rabbitmq_server_lines +=\ '%s server rabbit%s %s:5672 weight 100 check inter 2000 rise 2 fall 3 backup\n'\ % (space, server_index, host_ip) if server_index == 1: mysql_server_lines +=\ '%s server mysql%s %s:3306 weight 200 check inter 2000 rise 2 fall 3\n'\ % (space, server_index, host_ip) else: mysql_server_lines +=\ '%s server mysql%s %s:3306 weight 100 check inter 2000 rise 2 fall 3 backup\n'\ % (space, server_index, host_ip) for host_string in env.roledefs['openstack']: haproxy_config = openstack_haproxy.template.safe_substitute({ '__keystone_backend_servers__': keystone_server_lines, '__keystone_admin_backend_servers__': keystone_admin_server_lines, '__glance_backend_servers__': glance_server_lines, '__cinder_backend_servers__': cinder_server_lines, '__ceph_restapi_backend_servers__': ceph_restapi_server_lines, '__nova_api_backend_servers__': nova_api_server_lines, '__nova_meta_backend_servers__': nova_meta_server_lines, '__nova_vnc_backend_servers__': nova_vnc_server_lines, '__memcached_servers__': memcached_server_lines, '__rabbitmq_servers__': rabbitmq_server_lines, '__mysql_servers__': mysql_server_lines, '__contrail_hap_user__': 'haproxy', '__contrail_hap_passwd__': 'contrail123', }) for host_string in args: with settings(host_string=host_string): # chop old settings including pesky default from pkg... tmp_fname = "/tmp/haproxy-%s-config" % (host_string) get_as_sudo("/etc/haproxy/haproxy.cfg", tmp_fname) with settings(warn_only=True): local( "sed -i -e '/^#contrail-openstack-marker-start/,/^#contrail-openstack-marker-end/d' %s" % (tmp_fname)) local( "sed -i -e 's/frontend\s*main\s*\*:5000/frontend main *:5001/' %s" % (tmp_fname)) local("sed -i -e 's/*:5000/*:5001/' %s" % (tmp_fname)) local( "sed -i -e 's/ssl-relay 0.0.0.0:8443/ssl-relay 0.0.0.0:5002/' %s" % (tmp_fname)) local( "sed -i -e 's/option\shttplog/option tcplog/' %s" % (tmp_fname)) local("sed -i -e 's/maxconn 4096/maxconn 100000/' %s" % (tmp_fname)) local('sed -i "/^global/a\\ tune.bufsize 16384" %s' % tmp_fname) local('sed -i "/^global/a\\ tune.maxrewrite 1024" %s' % tmp_fname) local('sed -i "/^global/a\ spread-checks 4" %s' % tmp_fname) # Remove default HA config local("sed -i '/listen\sappli1-rewrite/,/rspidel/d' %s" % tmp_fname) local("sed -i '/listen\sappli3-relais/,/rspidel/d' %s" % tmp_fname) # ...generate new ones cfg_file = open(tmp_fname, 'a') cfg_file.write(haproxy_config) cfg_file.close() put(tmp_fname, "/etc/haproxy/haproxy.cfg", use_sudo=True) local("rm %s" % (tmp_fname)) # haproxy enable with settings(host_string=host_string, warn_only=True): sudo("chkconfig haproxy on") sudo("service supervisor-openstack stop") enable_haproxy() sudo("service haproxy restart") #Change the keystone admin/public port sudo( "openstack-config --set /etc/keystone/keystone.conf DEFAULT public_port 6000" ) sudo( "openstack-config --set /etc/keystone/keystone.conf DEFAULT admin_port 35358" )
def zookeeper_rolling_restart(): zoo_cfg = "/etc/zookeeper/conf/zoo.cfg" cfgm_nodes = copy.deepcopy(env.roledefs['cfgm']) database_nodes = copy.deepcopy(env.roledefs['database']) zookeeper_status = verfiy_zookeeper(*database_nodes) if (len(database_nodes) % 2) != 1: print "Recommended to run odd number of zookeeper(database) nodes." print "Add a new node to the existing clusters testbed,py and install contrail-install-packages in it.\n\ Installing/Provisioning will be done as part of Upgrade" exit(0) if cfgm_nodes == database_nodes: print "No need for rolling restart." if (len(database_nodes) > 1 and 'leader' in zookeeper_status.values() and 'follower' in zookeeper_status.values() and 'notrunning' not in zookeeper_status.values() and 'notinstalled' not in zookeeper_status.values() and 'standalone' not in zookeeper_status.values()): print zookeeper_status print "Zookeeper quorum is already formed properly." return elif (len(database_nodes) == 1 and 'notinstalled' not in zookeeper_status.values() and 'standalone' in zookeeper_status.values()): print zookeeper_status print "Zookeeper quorum is already formed properly." return execute('stop_cfgm') execute('backup_zookeeper_database') old_nodes = list(set(cfgm_nodes).difference(set(database_nodes))) new_nodes = list(set(database_nodes).difference(set(cfgm_nodes))) for new_node in new_nodes: zk_index = (database_nodes.index(new_node) + len(cfgm_nodes) + 1) with settings(host_string=new_node, password=env.passwords[new_node]): pdist = detect_ostype() print "Install zookeeper in the new node." execute('create_install_repo_node', new_node) remove_package(['supervisor'], pdist) upgrade_package(['python-contrail', 'contrail-openstack-database', 'zookeeper'], pdist) if pdist in ['ubuntu']: sudo("ln -sf /bin/true /sbin/chkconfig") sudo("chkconfig zookeeper on") print "Fix zookeeper configs" sudo("sudo sed 's/^#log4j.appender.ROLLINGFILE.MaxBackupIndex=/log4j.appender.ROLLINGFILE.MaxBackupIndex=/g' /etc/zookeeper/conf/log4j.properties > log4j.properties.new") sudo("sudo mv log4j.properties.new /etc/zookeeper/conf/log4j.properties") if pdist in ['centos']: sudo('echo export ZOO_LOG4J_PROP="INFO,CONSOLE,ROLLINGFILE" >> /usr/lib/zookeeper/bin/zkEnv.sh') if pdist in ['ubuntu']: sudo('echo ZOO_LOG4J_PROP="INFO,CONSOLE,ROLLINGFILE" >> /etc/zookeeper/conf/environment') print "put cluster-unique zookeeper's instance id in myid" sudo('sudo echo "%s" > /var/lib/zookeeper/myid' % (zk_index)) print "Add new nodes to existing zookeeper quorum" with settings(host_string=cfgm_nodes[0], password=env.passwords[cfgm_nodes[0]]): for new_node in new_nodes: zk_index = (database_nodes.index(new_node) + len(cfgm_nodes) + 1) sudo('echo "server.%d=%s:2888:3888" >> %s' % (zk_index, hstr_to_ip(new_node), zoo_cfg)) tmp_dir= tempfile.mkdtemp() get_as_sudo(zoo_cfg, tmp_dir) print "Restart zookeeper in all nodes to make new nodes join zookeeper quorum" for zookeeper_node in cfgm_nodes + new_nodes: with settings(host_string=zookeeper_node, password=env.passwords[zookeeper_node]): put(tmp_dir+'/zoo.cfg', zoo_cfg, use_sudo=True) print "Start Zookeeper in new database node" execute('restart_zookeeper') print "Waiting 5 seconds for the new nodes in the zookeeper quorum to be synced." sleep(5) print "Shutdown old nodes one by one and also make sure leader/follower election is complete after each shut downs" zoo_nodes = cfgm_nodes + database_nodes for old_node in old_nodes: zoo_nodes.remove(old_node) with settings(host_string=old_node, password=env.passwords[old_node]): print "Stop Zookeeper in old cfgm node" execute('stop_zookeeper') for zoo_node in zoo_nodes: with settings(host_string=zoo_node, password=env.passwords[zoo_node]): sudo("sed -i '/^server.*%s:2888:3888/d' %s" % (hstr_to_ip(zoo_node), zoo_cfg)) retries = 3 while retries: zookeeper_status = verfiy_zookeeper(*zoo_nodes) if (len(zoo_nodes) > 1 and 'leader' in zookeeper_status.values() and 'follower' in zookeeper_status.values() and 'notrunning' not in zookeeper_status.values() and 'notinstalled' not in zookeeper_status.values() and 'standalone' not in zookeeper_status.values()): print zookeeper_status print "Zookeeper quorum is formed properly." break elif (len(zoo_nodes) == 1 and 'notinstalled' not in zookeeper_status.values() and 'standalone' in zookeeper_status.values()): print zookeeper_status print "Zookeeper quorum is formed properly." break else: retries -= 1 if retries: for zoo_node in zoo_nodes: with settings(host_string=zoo_node, password=env.passwords[zoo_node]): execute('restart_zookeeper') continue print "Zookeeper quorum is not formed. Fix it and retry upgrade" print zookeeper_status exit(1) print "Correct the server id in zoo.cfg for the new nodes in the zookeeper quorum" with settings(host_string=database_nodes[0], password=env.passwords[database_nodes[0]]): sudo("sed -i '/^server.*3888/d' %s" % zoo_cfg) for zookeeper_node in database_nodes: zk_index = (database_nodes.index(zookeeper_node) + 1) sudo('echo "server.%d=%s:2888:3888" >> %s' % (zk_index, hstr_to_ip(zookeeper_node), zoo_cfg)) tmp_dir= tempfile.mkdtemp() get_as_sudo(zoo_cfg, tmp_dir) print "Correct the myid in myid file for the new nodes in the zookeeper quorum" for zookeeper_node in database_nodes: zk_index = (database_nodes.index(zookeeper_node) + 1) with settings(host_string=zookeeper_node, password=env.passwords[zookeeper_node]): print "put cluster-unique zookeeper's instance id in myid" sudo('sudo echo "%s" > /var/lib/zookeeper/myid' % (zk_index)) execute('stop_zookeeper') print "Restart all the zookeeper nodes in the new quorum" for zookeeper_node in database_nodes: with settings(host_string=zookeeper_node, password=env.passwords[zookeeper_node]): put(tmp_dir+'/zoo.cfg', zoo_cfg, use_sudo=True) execute('restart_zookeeper') print "Make sure leader/folower election is complete" with settings(host_string=zookeeper_node, password=env.passwords[zookeeper_node]): retries = 3 while retries: zookeeper_status = verfiy_zookeeper(*database_nodes) if (len(database_nodes) > 1 and 'leader' in zookeeper_status.values() and 'follower' in zookeeper_status.values() and 'notrunning' not in zookeeper_status.values() and 'notinstalled' not in zookeeper_status.values() and 'standalone' not in zookeeper_status.values()): print zookeeper_status break elif (len(database_nodes) == 1 and 'notinstalled' not in zookeeper_status.values() and 'standalone' in zookeeper_status.values()): print zookeeper_status print "Zookeeper quorum is already formed properly." break else: retries -= 1 if retries: continue print "Zookeepr leader/follower election has problems. Fix it and retry upgrade" print zookeeper_status exit(1)