예제 #1
0
def _handle(text):
    if text == '':
        return
    if text == 'clear':
        utils.clear_screen()
        return
    text = text.replace('-- --help', '?')
    text = text.replace('--help', '?')
    text = text.replace('?', '-- --help')
    err_flg = True
    try:
        fire.Fire(component=Command, command=text)
        err_flg = False
    except KeyboardInterrupt:
        msg = message.get('cancel_command_input')
        logger.warning('\b\b' + msg)
    except KeyError as ex:
        logger.warn('[%s] command fail' % text)
        logger.exception(ex)
    except TypeError as ex:
        logger.exception(ex)
    except IOError as ex:
        if ex.errno == 2:
            msg = message.get('error_file_not_exist').format(file=ex.filename)
            logger.error(msg)
        else:
            logger.exception(ex)
    except EOFError:
        msg = message.get('cancel_command_input')
        logger.warning('\b\b' + msg)
    except utils.CommandError as ex:
        logger.exception(ex)
    except FireError:
        pass
    except FireExit:
        pass
    except (
            HostNameError,
            HostConnectionError,
            SSHConnectionError,
            FileNotExistError,
            YamlSyntaxError,
            PropsSyntaxError,
            PropsKeyError,
            PropsError,
            SSHCommandError,
            ClusterRedisError,
            ClusterNotExistError,
            ClusterIdError,
            EnvError,
    ) as ex:
        logger.error('{}: {}'.format(ex.class_name(), str(ex)))
    except LightningDBError as ex:
        logger.error('[ErrorCode {}] {}'.format(ex.error_code, str(ex)))
    except BaseException as ex:
        logger.exception(ex)
    finally:
        return err_flg
예제 #2
0
def is_port_empty(host, port):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    result = True
    status = 'OK'
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.bind((host, port))
    except socket.error as e:
        result = False
        if e.errno == errno.EADDRINUSE:
            status = 'USED'
            logger.debug('{}:{} is already in use'.format(host, port))
        else:
            status = 'FAIL'
            logger.exception(e)
    except Exception as e:
        result = False
        status = 'FAIL'
        logger.exception(e)
    finally:
        return result, status
예제 #3
0
def _deploy(cluster_id, history_save, clean):
    deploy_state = DeployUtil().get_state(cluster_id)
    if deploy_state == DEPLOYED:
        msg = message.get('ask_deploy_again')
        msg = msg.format(cluster_id=cluster_id)
        msg = color.yellow(msg)
        yes = ask_util.askBool(msg, default='n')
        if not yes:
            logger.info(message.get('cancel'))
            return

    restore_yes = None
    no_localhost = False
    current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime())
    cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time)
    conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time)
    tmp_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, 'tmp')
    meta = [['NAME', 'VALUE']]
    path_of_fb = config.get_path_of_fb(cluster_id)
    conf_path = path_of_fb['conf_path']
    props_path = path_of_fb['redis_properties']
    cluster_path = path_of_fb['cluster_path']
    path_of_cli = config.get_path_of_cli(cluster_id)
    conf_backup_path = path_of_cli['conf_backup_path']
    tmp_backup_path = os.path.join(conf_backup_path, tmp_backup_dir)
    local_ip = config.get_local_ip()

    # ask installer
    installer_path = ask_util.installer()
    installer_name = os.path.basename(installer_path)
    meta.append(['installer', installer_name])

    # ask restore conf
    if deploy_state == DEPLOYED:
        restore_yes = ask_util.askBool(message.get('ask_restore_conf'))
        meta.append(['restore', restore_yes])

    # input props
    hosts = []
    if deploy_state == DEPLOYED:
        if restore_yes:
            meta += DeployUtil().get_meta_from_props(props_path)
            hosts = config.get_props(props_path, 'sr2_redis_master_hosts')
        else:
            if not os.path.isdir(conf_backup_path):
                os.mkdir(conf_backup_path)
            if os.path.exists(tmp_backup_path):
                msg = message.get('ask_load_history_of_previous_modification')
                yes = ask_util.askBool(msg)
                if not yes:
                    shutil.rmtree(tmp_backup_path)
            if not os.path.exists(tmp_backup_path):
                os.mkdir(tmp_backup_path)
                shutil.copy(os.path.join(conf_path, 'redis.properties'),
                            os.path.join(tmp_backup_path, 'redis.properties'))
            tmp_props_path = os.path.join(tmp_backup_path, 'redis.properties')
            editor.edit(tmp_props_path, syntax='sh')
            meta += DeployUtil().get_meta_from_props(tmp_props_path)
            hosts = config.get_props(tmp_props_path, 'sr2_redis_master_hosts')
    else:
        # new deploy
        props_dict = ask_util.props(cluster_id, save=history_save)
        hosts = props_dict['hosts']
        meta += DeployUtil().get_meta_from_dict(props_dict)
    utils.print_table(meta)

    msg = message.get('confirm_deploy_information')
    yes = ask_util.askBool(msg)
    if not yes:
        logger.info(message.get('cancel'))
        return

    # check node status
    success = Center().check_hosts_connection(hosts, True)
    if not success:
        msg = message.get('error_exist_unavailable_host')
        logger.error(msg)
        return
    logger.debug('Connection of all hosts ok.')
    success = Center().check_include_localhost(hosts)
    if not success:
        no_localhost = True

    # get port info
    if deploy_state == DEPLOYED:
        if restore_yes:
            key = 'sr2_redis_master_ports'
            m_ports = config.get_props(props_path, key, [])
            key = 'sr2_redis_slave_ports'
            s_ports = config.get_props(props_path, key, [])
            replicas = len(s_ports) // len(m_ports)
        else:
            key = 'sr2_redis_master_ports'
            m_ports = config.get_props(tmp_props_path, key, [])
            key = 'sr2_redis_slave_ports'
            s_ports = config.get_props(tmp_props_path, key, [])
            replicas = len(s_ports) // len(m_ports)
    else:
        m_ports = props_dict['master_ports']
        s_ports = props_dict['slave_ports']
        replicas = props_dict['replicas']

    while True:
        msg = message.get('check_port')
        logger.info(msg)
        host_ports_list = []
        for host in hosts:
            host_ports_list.append((host, m_ports + s_ports))
        conflict = Center().check_port_is_enable(host_ports_list)
        if not conflict:
            logger.info("OK")
            break
        utils.print_table([["HOST", "PORT"]] + conflict)
        msg = message.get('ask_port_collision')
        msg = color.yellow(msg)
        yes = ask_util.askBool(msg)
        if yes:
            logger.info("OK")
            break
        m_ports = ask_util.master_ports(False, cluster_id)
        replicas = ask_util.replicas(False)
        s_ports = ask_util.slave_ports(cluster_id, len(m_ports), replicas)
        if deploy_state == DEPLOYED:
            if restore_yes:
                key = 'sr2_redis_master_ports'
                value = cluster_util.convert_list_2_seq(m_ports)
                config.set_props(props_path, key, value)
                key = 'sr2_redis_slave_ports'
                value = cluster_util.convert_list_2_seq(s_ports)
                config.set_props(props_path, key, value)
            else:
                key = 'sr2_redis_master_ports'
                value = cluster_util.convert_list_2_seq(m_ports)
                config.set_props(tmp_props_path, key, value)
                key = 'sr2_redis_slave_ports'
                value = cluster_util.convert_list_2_seq(s_ports)
                config.set_props(tmp_props_path, key, value)
        else:
            props_dict['master_ports'] = m_ports
            props_dict['slave_ports'] = s_ports
            props_dict['replicas'] = replicas

    # if pending, delete legacy on each hosts
    if no_localhost:
        if DeployUtil().get_state(cluster_id, local_ip) == PENDING:
            client = net.get_ssh(local_ip)
            command = 'rm -rf {}'.format(cluster_path)
            net.ssh_execute(client=client, command=command)
            client.close()
    for host in hosts:
        if DeployUtil().get_state(cluster_id, host) == PENDING:
            client = net.get_ssh(host)
            command = 'rm -rf {}'.format(cluster_path)
            net.ssh_execute(client=client, command=command)
            client.close()

    # added_hosts = post_hosts - pre_hosts
    msg = message.get('check_cluster_exist')
    logger.info(msg)
    added_hosts = set(hosts)
    meta = []
    if deploy_state == DEPLOYED:
        pre_hosts = config.get_props(props_path, 'sr2_redis_master_hosts')
        added_hosts -= set(pre_hosts)
    can_deploy = True
    if no_localhost:
        added_hosts |= set([local_ip])
    for host in added_hosts:
        client = net.get_ssh(host)
        is_localhost = Center().is_localhost(host)
        if is_localhost:
            if no_localhost:
                continue
            if os.path.exists(cluster_path + '/remote'):
                meta.append([host, color.green('CLEAN')])
                continue
        if net.is_exist(client, cluster_path):
            meta.append([host, color.red('CLUSTER EXIST')])
            can_deploy = False
            continue
        meta.append([host, color.green('CLEAN')])
    if meta:
        utils.print_table([['HOST', 'STATUS']] + meta)
    if not can_deploy:
        msg = message.get('error_cluster_collision')
        logger.error(msg)
        return
        # if not force:
        #     logger.error("If you trying to force, use option '--force'")
        #     return
    logger.info('OK')

    # cluster stop and clean
    if deploy_state == DEPLOYED and clean:
        center = Center()
        cur_cluster_id = config.get_cur_cluster_id(allow_empty_id=True)
        run_cluster_use(cluster_id)
        center.update_ip_port()
        center.stop_redis()
        center.remove_all_of_redis_log_force()
        center.cluster_clean()
        run_cluster_use(cur_cluster_id)

    # backup conf
    if deploy_state == DEPLOYED:
        Center().conf_backup(local_ip, cluster_id, conf_backup_dir)

    # backup cluster
    backup_hosts = []
    if deploy_state == DEPLOYED:
        backup_hosts += set(pre_hosts)
    # if force:
    #     backup_hosts += added_hosts
    for host in backup_hosts:
        cluster_path = path_of_fb['cluster_path']
        client = net.get_ssh(host)
        Center().cluster_backup(host, cluster_id, cluster_backup_dir)
        client.close()

    # transfer & install
    msg = message.get('transfer_and_execute_installer')
    logger.info(msg)
    target_hosts = hosts + [local_ip] if no_localhost else hosts
    for host in target_hosts:
        if not (no_localhost and Center().is_localhost(host)):
            logger.info(' - {}'.format(host))
        client = net.get_ssh(host)
        cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path)
        net.ssh_execute(client=client, command=cmd)
        client.close()
        DeployUtil().transfer_installer(host, cluster_id, installer_path)
        try:
            DeployUtil().install(host, cluster_id, installer_name)
        except SSHCommandError as ex:
            msg = message.get('error_execute_installer')
            msg = msg.format(installer=installer_path)
            logger.error(msg)
            logger.exception(ex)
            return

    # setup props
    if deploy_state == DEPLOYED:
        if restore_yes:
            tag = conf_backup_dir
        else:
            tag = tmp_backup_dir
        Center().conf_restore(local_ip, cluster_id, tag)
    else:
        key = 'sr2_redis_master_hosts'
        config.make_key_enable(props_path, key)
        config.set_props(props_path, key, props_dict['hosts'])

        key = 'sr2_redis_master_ports'
        config.make_key_enable(props_path, key)
        value = cluster_util.convert_list_2_seq(props_dict['master_ports'])
        config.set_props(props_path, key, value)

        key = 'sr2_redis_slave_hosts'
        config.make_key_enable(props_path, key)
        config.set_props(props_path, key, props_dict['hosts'])
        config.make_key_disable(props_path, key)

        if props_dict['replicas'] > 0:
            key = 'sr2_redis_slave_hosts'
            config.make_key_enable(props_path, key)

            key = 'sr2_redis_slave_ports'
            config.make_key_enable(props_path, key)
            value = cluster_util.convert_list_2_seq(props_dict['slave_ports'])
            config.set_props(props_path, key, value)

        key = 'ssd_count'
        config.make_key_enable(props_path, key)
        config.set_props(props_path, key, props_dict['ssd_count'])

        key = 'sr2_redis_data'
        config.make_key_enable(props_path, key, v1_flg=True)
        config.make_key_enable(props_path, key, v1_flg=True)
        config.make_key_disable(props_path, key)
        config.set_props(props_path, key, props_dict['prefix_of_db_path'])

        key = 'sr2_redis_db_path'
        config.make_key_enable(props_path, key, v1_flg=True)
        config.make_key_enable(props_path, key, v1_flg=True)
        config.make_key_disable(props_path, key)
        config.set_props(props_path, key, props_dict['prefix_of_db_path'])

        key = 'sr2_flash_db_path'
        config.make_key_enable(props_path, key, v1_flg=True)
        config.make_key_enable(props_path, key, v1_flg=True)
        config.make_key_disable(props_path, key)
        config.set_props(props_path, key, props_dict['prefix_of_db_path'])

    # synk props
    msg = message.get('sync_conf')
    logger.info(msg)
    for node in hosts:
        if socket.gethostbyname(node) in config.get_local_ip_list():
            continue
        client = net.get_ssh(node)
        if not client:
            msg = message.get('error_ssh_connection').format(host=node)
            logger.error(msg)
            return
        net.copy_dir_to_remote(client, conf_path, conf_path)
        client.close()

    # set deploy state complete
    if os.path.exists(tmp_backup_path):
        shutil.rmtree(tmp_backup_path)
    for node in target_hosts:
        path_of_fb = config.get_path_of_fb(cluster_id)
        cluster_path = path_of_fb['cluster_path']
        client = net.get_ssh(node)
        cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state'))
        net.ssh_execute(client=client, command=cmd)
        client.close()
    if no_localhost:
        os.system('touch {}/remote'.format(cluster_path))

    msg = message.get('complete_deploy').format(cluster_id=cluster_id)
    logger.info(msg)
    Cluster().use(cluster_id)
    msg = message.get('suggest_after_deploy')
    logger.info(msg)
예제 #4
0
def _deploy_zero_downtime(cluster_id):
    logger.debug("zero downtime update cluster {}".format(cluster_id))
    center = Center()
    center.update_ip_port()
    m_hosts = center.master_host_list
    m_ports = center.master_port_list
    s_hosts = center.slave_host_list
    s_ports = center.slave_port_list
    path_of_fb = config.get_path_of_fb(cluster_id)
    cluster_path = path_of_fb['cluster_path']

    # check master alive
    m_count = len(m_hosts) * len(m_ports)
    alive_m_count = center.get_alive_master_redis_count()
    if alive_m_count < m_count:
        logger.error(message.get('error_exist_disconnected_master'))
        return

    if not config.is_slave_enabled:
        logger.error(message.get('error_need_to_slave'))
        return

    # select installer
    installer_path = ask_util.installer()
    installer_name = os.path.basename(installer_path)

    # backup info
    current_time = time.strftime("%Y%m%d%H%M%S", time.gmtime())
    conf_backup_dir = 'cluster_{}_conf_bak_{}'.format(cluster_id, current_time)
    cluster_backup_dir = 'cluster_{}_bak_{}'.format(cluster_id, current_time)
    local_ip = config.get_local_ip()

    # backup conf
    center.conf_backup(local_ip, cluster_id, conf_backup_dir)

    # backup cluster
    for host in s_hosts:
        client = net.get_ssh(host)
        center.cluster_backup(host, cluster_id, cluster_backup_dir)
        client.close()

    # transfer & install
    logger.info(message.get('transfer_and_execute_installer'))
    for host in m_hosts:
        logger.info(' - {}'.format(host))
        client = net.get_ssh(host)
        cmd = 'mkdir -p {0} && touch {0}/.deploy.state'.format(cluster_path)
        net.ssh_execute(client=client, command=cmd)
        client.close()
        DeployUtil().transfer_installer(host, cluster_id, installer_path)
        try:
            DeployUtil().install(host, cluster_id, installer_name)
        except SSHCommandError as ex:
            msg = message.get('error_execute_installer')
            msg = msg.format(installer=installer_path)
            logger.error(msg)
            logger.exception(ex)
            return

    # restore conf
    center.conf_restore(local_ip, cluster_id, conf_backup_dir)

    # set deploy state complete
    for node in m_hosts:
        path_of_fb = config.get_path_of_fb(cluster_id)
        cluster_path = path_of_fb['cluster_path']
        client = net.get_ssh(node)
        cmd = 'rm -rf {}'.format(os.path.join(cluster_path, '.deploy.state'))
        net.ssh_execute(client=client, command=cmd)
        client.close()

    # restart slave
    center.stop_current_nodes(master=False, slave=True)
    center.configure_redis()
    center.sync_conf()
    center.start_current_nodes(master=False, slave=True)

    center.wait_until_all_redis_process_up()
    slaves_for_failover = center.get_slave_nodes()

    key = 'cluster-node-timeout'
    origin_m_value = center.cli_config_get(key, m_hosts[0], m_ports[0])
    origin_s_value = center.cli_config_get(key, s_hosts[0], s_ports[0])
    logger.debug('config set: cluster-node-timeout 2000')
    RedisCliConfig().set(key, '2000', all=True)

    # cluster failover (with no option)
    logger.info(message.get('failover_on_deploy'))
    logger.debug(slaves_for_failover)
    try_count = 0
    while try_count < 10:
        try_count += 1
        success = True
        for slave_addr in slaves_for_failover:
            host, port = slave_addr.split(':')
            stdout = center.run_failover("{}:{}".format(host, port))
            logger.debug("failover {}:{} {}".format(host, port, stdout))
            if stdout != "ERR You should send CLUSTER FAILOVER to a slave":
                # In some cases, the cluster failover is not complete
                # even if stdout is OK
                # If redis changed to master completely,
                # return 'ERR You should send CLUSTER FAILOVER to a slave'
                success = False
        if success:
            break
        msg = message.get('retry').format(try_count=try_count)
        logger.info(msg)
        time.sleep(5)
    logger.debug('restore config: cluster-node-timeout')
    center.cli_config_set_all(key, origin_m_value, m_hosts, m_ports)
    center.cli_config_set_all(key, origin_s_value, s_hosts, s_ports)
    if not success:
        logger.error(message.get('error_redis_failover'))
        return

    # restart master (current slave)
    center.stop_current_nodes(master=False, slave=True)
    center.configure_redis(slave=False)
    center.sync_conf()
    center.start_current_nodes(master=False, slave=True)
    center.wait_until_all_redis_process_up()