Example #1
0
    def reset_distribution(self):
        """ Reset the distribution of masters and slaves with original setting
        """
        center = Center()
        center.update_ip_port()
        logger.debug('reset_distribution')
        cluster_id = config.get_cur_cluster_id()
        lib_path = config.get_ld_library_path(cluster_id)
        path_of_fb = config.get_path_of_fb(cluster_id)
        sr2_redis_bin = path_of_fb['sr2_redis_bin']
        env_cmd = [
            'GLOBIGNORE=*;',
            'export LD_LIBRARY_PATH={};'.format(lib_path['ld_library_path']),
            'export DYLD_LIBRARY_PATH={};'.format(
                lib_path['dyld_library_path']),
        ]
        redis_cli_cmd = os.path.join(sr2_redis_bin, 'redis-cli')
        slave_nodes = center.get_slave_nodes()
        master_ports = center.master_port_list

        for slave_node in slave_nodes:
            (host, port) = slave_node.split(':')
            try:
                value = int(port)
                if value in master_ports:
                    # failover takeover
                    msg = message.get('try_failover_takeover').format(
                        slave=slave_node)
                    self._print(msg)
                    sub_cmd = 'cluster failover takeover'
                    command = '{} {} -h {} -p {} {}'.format(
                        ' '.join(env_cmd),
                        redis_cli_cmd,
                        host,
                        port,
                        sub_cmd,
                    )
                    stdout = subprocess.check_output(command, shell=True)
                    outs = ''
                    outs = '\n'.join([outs, stdout])
                    self._print(outs)
            except ValueError:
                pass
Example #2
0
def _deploy_zero_downtime(cluster_id):
    logger.debug("zero downtime update cluster {}".format(cluster_id))
    center = Center()
    center.update_ip_port()
    m_hosts = center.master_host_list
    m_ports = center.master_port_list
    s_hosts = center.slave_host_list
    s_ports = center.slave_port_list
    path_of_fb = config.get_path_of_fb(cluster_id)
    cluster_path = path_of_fb['cluster_path']

    # check master alive
    m_count = len(m_hosts) * len(m_ports)
    alive_m_count = center.get_alive_master_redis_count()
    if alive_m_count < m_count:
        logger.error(message.get('error_exist_disconnected_master'))
        return

    if not config.is_slave_enabled:
        logger.error(message.get('error_need_to_slave'))
        return

    # select installer
    installer_path = ask_util.installer()
    installer_name = os.path.basename(installer_path)

    # backup info
    current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime())
    conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time)
    cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time)
    local_ip = config.get_local_ip()

    # backup conf
    center.conf_backup(local_ip, cluster_id, conf_backup_dir)

    # backup cluster
    for host in s_hosts:
        client = net.get_ssh(host)
        center.cluster_backup(host, cluster_id, cluster_backup_dir)
        client.close()

    # transfer & install
    logger.info(message.get('transfer_and_execute_installer'))
    for host in m_hosts:
        logger.info(' - {}'.format(host))
        client = net.get_ssh(host)
        cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path)
        net.ssh_execute(client=client, command=cmd)
        client.close()
        DeployUtil().transfer_installer(host, cluster_id, installer_path)
        try:
            DeployUtil().install(host, cluster_id, installer_name)
        except SSHCommandError as ex:
            msg = message.get('error_execute_installer')
            msg = msg.format(installer=installer_path)
            logger.error(msg)
            logger.exception(ex)
            return

    # restore conf
    center.conf_restore(local_ip, cluster_id, conf_backup_dir)

    # set deploy state complete
    for node in m_hosts:
        path_of_fb = config.get_path_of_fb(cluster_id)
        cluster_path = path_of_fb['cluster_path']
        client = net.get_ssh(node)
        cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state'))
        net.ssh_execute(client=client, command=cmd)
        client.close()

    # restart slave
    center.stop_current_nodes(master=False, slave=True)
    center.configure_redis()
    center.sync_conf()
    center.start_current_nodes(master=False, slave=True)

    center.wait_until_all_redis_process_up()
    slaves_for_failover = center.get_slave_nodes()

    key = 'cluster-node-timeout'
    origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0])
    origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
    logger.debug('config set: cluster-node-timeout 2000')
    RedisCliConfig().set(key, '2000', all=True)

    # cluster failover (with no option)
    logger.info(message.get('failover_on_deploy'))
    logger.debug(slaves_for_failover)
    try_count = 0
    while try_count < 10:
        try_count += 1
        success = True
        for slave_addr in slaves_for_failover:
            host, port = slave_addr.split(':')
            stdout = center.run_failover("{}:{}".format(host, port))
            logger.debug("failover {}:{} {}".format(host, port, stdout))
            if stdout != "ERR You should send CLUSTER FAILOVER to a slave":
                # In some cases, the cluster failover is not complete
                # even if stdout is OK
                # If redis changed to master completely,
                # return 'ERR You should send CLUSTER FAILOVER to a slave'
                success = False
        if success:
            break
        msg = message.get('retry').format(try_count=try_count)
        logger.info(msg)
        time.sleep(5)
    logger.debug('restore config: cluster-node-timeout')
    center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports)
    center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
    if not success:
        logger.error(message.get('error_redis_failover'))
        return

    # restart master (current slave)
    center.stop_current_nodes(master=False, slave=True)
    center.configure_redis(slave=False)
    center.sync_conf()
    center.start_current_nodes(master=False, slave=True)
    center.wait_until_all_redis_process_up()
Example #3
0
    def failover_list(self):
        """ Find failovered|no-slave|no-slot masters and failbacked slaves
        """
        center = Center()
        center.update_ip_port()
        logger.debug('failover_list')
        master_nodes = center.get_master_obj_list()
        slave_nodes = center.get_slave_nodes()
        master_ports = center.master_port_list
        slave_ports = center.slave_port_list
        output_msg = []

        failovered_masters = []
        for master_node in master_nodes:
            addr = master_node['addr']
            port = addr.split(':')[1]
            try:
                value = int(port)
                if value in slave_ports:
                    failovered_masters.append(addr)
            except ValueError:
                pass

        noslave_masters = []
        for master_node in master_nodes:
            if len(master_node['slaves']) == 0:
                noslave_masters.append(master_node['addr'])
            else:
                for slave_node in master_node['slaves']:
                    if slave_node['status'] == 'disconnected':
                        noslave_masters.append(master_node['addr'])
                        break

        noslot_masters = []
        ret = RedisCliUtil.command_all_async('cluster nodes', slave=True)
        outs = ''
        for _, host, port, res, stdout in ret:
            if res == 'OK':
                outs = '\n'.join([outs, stdout])
                lines = outs.splitlines()
                filtered_nodes = (filter(lambda x: 'myself,master' in x,
                                         lines))
            else:
                logger.warning("FAIL {}:{} {}".format(host, port, stdout))
        for line in filtered_nodes:
            words = line.split()
            if len(words) == 8:
                noslot_masters.append(line.split()[1])

        failbacked_slaves = []
        for slave_nodes in slave_nodes:
            port = slave_nodes.split(':')[1]
            try:
                value = int(port)
                if value in master_ports:
                    failbacked_slaves.append(slave_nodes)
            except ValueError:
                pass

        output_msg.append('1) failovered masters:')
        output_msg.extend(failovered_masters)
        output_msg.append('')
        output_msg.append('2) no-slave masters:')
        output_msg.extend(noslave_masters)
        output_msg.append('')
        output_msg.append('3) no-slot masters:')
        output_msg.extend(noslot_masters)
        output_msg.append('')
        output_msg.append('4) failbacked slaves:')
        output_msg.extend(failbacked_slaves)
        output_msg.append('')

        logger.info(color.ENDC + '\n'.join(output_msg))