Example #1
0
    def add_slave(self, yes=False):
        """Add slave of cluster

        Add slaves to cluster that configured master only.
        :param yes: Skip confirm information
        """
        logger.debug('add_slave')
        if not isinstance(yes, bool):
            msg = message.get('error_option_type_not_boolean')
            msg = msg.format(option='yes')
            logger.error(msg)
            return
        center = Center()
        center.update_ip_port()
        # check
        s_hosts = center.slave_host_list
        s_ports = center.slave_port_list
        if not s_hosts:
            msg = message.get('error_slave_host_empty')
            raise ClusterRedisError(msg)
        if not s_ports:
            msg = message.get('error_slave_port_empty')
            raise ClusterRedisError(msg)
        success = center.check_hosts_connection(hosts=s_hosts)
        if not success:
            return
        center.ensure_cluster_exist()
        slave_alive_count = center.get_alive_slave_redis_count()
        slave_alive_count_mine = center.get_alive_slave_redis_count(
            check_owner=True
        )
        not_mine_count = slave_alive_count - slave_alive_count_mine
        if not_mine_count > 0:
            msg = message.get('error_cluster_start_slave_collision')
            msg = '\n'.join(msg).format(count=not_mine_count)
            raise LightningDBError(12, msg)

        # confirm info
        result = center.confirm_node_port_info(skip=yes)
        if not result:
            msg = message.get('cancel')
            logger.warning(msg)
            return
        # clean
        center.cluster_clean(master=False)
        # backup logs
        center.backup_server_logs(master=False)
        center.create_redis_data_directory(master=False)
        # configure
        center.configure_redis(master=False)
        center.sync_conf()
        # start
        center.start_redis_process(master=False)
        center.wait_until_all_redis_process_up()

        # change redis config temporarily
        key = 'cluster-node-timeout'
        origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
        if not origin_s_value:
            msg = "RedisConfigKeyError: '{}'".format(key)
            logger.warning(msg)
        if origin_s_value:
            # cli config set cluster-node-timeout 2000
            logger.debug('set cluster node time out 2000 for create')
            center.cli_config_set_all(key, '2000', s_hosts, s_ports)
        # create
        center.replicate()
        if origin_s_value:
            # cli config restore cluster-node-timeout
            logger.debug('restore cluster node time out')
            center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
Example #2
0
def _deploy_zero_downtime(cluster_id):
    logger.debug("zero downtime update cluster {}".format(cluster_id))
    center = Center()
    center.update_ip_port()
    m_hosts = center.master_host_list
    m_ports = center.master_port_list
    s_hosts = center.slave_host_list
    s_ports = center.slave_port_list
    path_of_fb = config.get_path_of_fb(cluster_id)
    cluster_path = path_of_fb['cluster_path']

    # check master alive
    m_count = len(m_hosts) * len(m_ports)
    alive_m_count = center.get_alive_master_redis_count()
    if alive_m_count < m_count:
        logger.error(message.get('error_exist_disconnected_master'))
        return

    if not config.is_slave_enabled:
        logger.error(message.get('error_need_to_slave'))
        return

    # select installer
    installer_path = ask_util.installer()
    installer_name = os.path.basename(installer_path)

    # backup info
    current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime())
    conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time)
    cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time)
    local_ip = config.get_local_ip()

    # backup conf
    center.conf_backup(local_ip, cluster_id, conf_backup_dir)

    # backup cluster
    for host in s_hosts:
        client = net.get_ssh(host)
        center.cluster_backup(host, cluster_id, cluster_backup_dir)
        client.close()

    # transfer & install
    logger.info(message.get('transfer_and_execute_installer'))
    for host in m_hosts:
        logger.info(' - {}'.format(host))
        client = net.get_ssh(host)
        cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path)
        net.ssh_execute(client=client, command=cmd)
        client.close()
        DeployUtil().transfer_installer(host, cluster_id, installer_path)
        try:
            DeployUtil().install(host, cluster_id, installer_name)
        except SSHCommandError as ex:
            msg = message.get('error_execute_installer')
            msg = msg.format(installer=installer_path)
            logger.error(msg)
            logger.exception(ex)
            return

    # restore conf
    center.conf_restore(local_ip, cluster_id, conf_backup_dir)

    # set deploy state complete
    for node in m_hosts:
        path_of_fb = config.get_path_of_fb(cluster_id)
        cluster_path = path_of_fb['cluster_path']
        client = net.get_ssh(node)
        cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state'))
        net.ssh_execute(client=client, command=cmd)
        client.close()

    # restart slave
    center.stop_current_nodes(master=False, slave=True)
    center.configure_redis()
    center.sync_conf()
    center.start_current_nodes(master=False, slave=True)

    center.wait_until_all_redis_process_up()
    slaves_for_failover = center.get_slave_nodes()

    key = 'cluster-node-timeout'
    origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0])
    origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
    logger.debug('config set: cluster-node-timeout 2000')
    RedisCliConfig().set(key, '2000', all=True)

    # cluster failover (with no option)
    logger.info(message.get('failover_on_deploy'))
    logger.debug(slaves_for_failover)
    try_count = 0
    while try_count < 10:
        try_count += 1
        success = True
        for slave_addr in slaves_for_failover:
            host, port = slave_addr.split(':')
            stdout = center.run_failover("{}:{}".format(host, port))
            logger.debug("failover {}:{} {}".format(host, port, stdout))
            if stdout != "ERR You should send CLUSTER FAILOVER to a slave":
                # In some cases, the cluster failover is not complete
                # even if stdout is OK
                # If redis changed to master completely,
                # return 'ERR You should send CLUSTER FAILOVER to a slave'
                success = False
        if success:
            break
        msg = message.get('retry').format(try_count=try_count)
        logger.info(msg)
        time.sleep(5)
    logger.debug('restore config: cluster-node-timeout')
    center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports)
    center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
    if not success:
        logger.error(message.get('error_redis_failover'))
        return

    # restart master (current slave)
    center.stop_current_nodes(master=False, slave=True)
    center.configure_redis(slave=False)
    center.sync_conf()
    center.start_current_nodes(master=False, slave=True)
    center.wait_until_all_redis_process_up()
Example #3
0
    def create(self, yes=False):
        """Create cluster

        Before create cluster, all redis should be running.
        :param yes: skip confirm information
        """
        center = Center()
        center.update_ip_port()
        success = center.check_hosts_connection()
        if not success:
            return

        m_count = len(center.master_host_list) * len(center.master_port_list)
        if m_count < 3:
            msg = message.get('error_master_redis_less_than_3')
            raise ClusterRedisError(msg)

        # if need to cluster start
        alive_count = center.get_alive_all_redis_count()
        my_alive_count = center.get_alive_all_redis_count(check_owner=True)
        if alive_count != my_alive_count:
            msg = message.get('error_cluster_start_port_collision')
            raise ClusterRedisError(msg)
        all_count = len(center.all_host_list)
        if alive_count < all_count:
            logger.debug('cluster start in create')
            # init
            center.backup_server_logs()
            center.create_redis_data_directory()

            # cluster configure
            center.configure_redis()
            center.sync_conf(show_result=True)

            # cluster start
            center.start_redis_process()
            center.wait_until_all_redis_process_up()

        key = 'cluster-node-timeout'
        m_hosts = center.master_host_list
        m_ports = center.master_port_list
        origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0])
        if not origin_m_value:
            msg = "RedisConfigKeyError(master): '{}'".format(key)
            logger.warning(msg)
        s_hosts = center.slave_host_list
        s_ports = center.slave_port_list
        if s_hosts and s_ports:
            origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
            if not origin_s_value:
                msg = "RedisConfigKeyError(slave): '{}'".format(key)
                logger.warning(msg)
        if origin_m_value:
            # cli config set cluster-node-timeout 2000
            logger.debug('set cluster node time out 2000 for create')
            center.cli_config_set_all(key, '2000', m_hosts, m_ports)
            if s_hosts and s_ports and origin_s_value:
                center.cli_config_set_all(key, '2000', s_hosts, s_ports)
        center.create_cluster(yes)
        if origin_m_value:
            # cli config restore cluster-node-timeout
            logger.debug('restore cluster node time out')
            center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports)
            if s_hosts and s_ports and origin_s_value:
                v = origin_s_value
                center.cli_config_set_all(key, v, s_hosts, s_ports)