예제 #1
0
파일: base.py 프로젝트: nrc/tiup
    def check_tombstone(self, topology=None, args=None):
        if not topology:
            topology = self.topology
        if not args:
            args = self._args
        _remove_uuid = []
        _cluster = ClusterAPI(topology)
        _binlog = BinlogAPI(topology)

        if _cluster.tikv_stores() and _cluster.tikv_tombstone():
            # get tombstone tikv node
            for _node in topology()['tikv_servers']:
                _tombstone = False
                if not _node['offline']:
                    continue

                # online tikv node list
                _online_list = [
                    x['store']['address']
                    for x in _cluster.tikv_stores()['stores']
                ]
                # tombstone status tikv list
                _tombstone_list = [
                    x['store']['address']
                    for x in _cluster.tikv_tombstone()['stores']
                ]

                _address = '{}:{}'.format(_node['ip'], _node['port'])

                # if node is online, skip it
                if _address in _online_list:
                    continue
                # if node is tombstone, will delete it from topology
                elif _address in _tombstone_list:
                    _remove_uuid.append(_node['uuid'])

        if _binlog.pump_status:
            # get tombstone pump node
            for _node in topology()['pump_servers']:
                _tombstone = False
                if not _node['offline']:
                    continue

                _online_list = [
                    x['nodeId']
                    for x in _binlog.pump_status['status'].itervalues()
                    if x['state'] != 'offline'
                ]
                _tombstone_list = [
                    x['nodeId']
                    for x in _binlog.pump_status['status'].itervalues()
                    if x['state'] == 'offline'
                ]

                if _node['uuid'] in _online_list:
                    continue
                elif _node['uuid'] in _tombstone_list:
                    _remove_uuid.append(_node['uuid'])

            for _node in topology()['drainer_servers']:
                _tombstone = False
                if not _node['offline']:
                    continue

                _online_list = [
                    x['nodeId'] for x in _binlog.drainer_status
                    if x['state'] != 'offline'
                ]
                _tombstone_list = [
                    x['nodeId'] for x in _binlog.drainer_status
                    if x['state'] == 'offline'
                ]

                if _node['uuid'] in _online_list:
                    continue
                elif _node['uuid'] in _tombstone_list:
                    _remove_uuid.append(_node['uuid'])

        if not _remove_uuid:
            return

        _new_topo, _diff = topology.remove(','.join(_remove_uuid), delete=True)
        ans = ansibleapi.ANSRunner(user=topology.user,
                                   topology=_diff,
                                   tiargs=args)
        act = Action(ans=ans, topo=topology)
        for service in [{
                'drainer': 'drainer_servers'
        }, {
                'pump': 'pump_servers'
        }, {
                'tikv': 'tikv_servers'
        }]:
            component, pattern = self.check_exist(service, _diff)
            if not component and not pattern:
                continue
            act.stop_component(component=component, pattern=pattern)
            act.destroy_component(component=component, pattern=pattern)

        topology.replace(_new_topo)
예제 #2
0
파일: scale.py 프로젝트: nrc/tiup
class OprScaleIn(OperationBase):
    def __init__(self, args=None, topology=None, node=None):
        if not node:
            msg = 'Node ID not specified.'
            term.error(msg)
            raise exceptions.TiOPSConfigError(msg)

        self._new_topo, self._diff = topology.remove(node)

        super(OprScaleIn, self).__init__(args, topology)
        self.act = Action(ans=self.ans, topo=self.topology)

    def _prepare(self, component=None, pattern=None, node=None, role=None):
        term.notice('Begin delete node for TiDB cluster.')
        self._cluster = modules.ClusterAPI(topology=self.topology)
        self._pd_status = self._cluster.status()
        self._tikv_stores = self._cluster.tikv_stores()

    def _process(self, component=None, pattern=None, node=None, role=None):
        _unhealth_node = []
        for _pd_node in self._cluster.status():
            if not _pd_node['health']:
                _unhealth_node.append(_pd_node['name'])
                msg = 'Some pd node is unhealthy, maybe server stoppd or network unreachable, unhealthy node list: {}'.format(
                    ','.join(_unhealth_node))
                term.fatal(msg)
                raise exceptions.TiOPSRuntimeError(msg, operation='scaleIn')

        _current_pd_num = len(self._pd_status)
        _current_tikv_num = len(self._tikv_stores)

        if 'pd_servers' in self._diff and len(
                self._diff['pd_servers']) == _current_pd_num:
            term.fatal('Can not delete all pd node.')
            exit(1)

        if 'tikv_servers' in self._diff and len(
                self._diff['tikv_servers']) == _current_tikv_num:
            term.fatal('Can not delete all tikv node.')
            exit(1)

        term.info('Check ssh connection.')
        self.act.check_ssh_connection()

        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service, self._diff)
            if not component and not pattern:
                continue
            uuid = [x['uuid'] for x in self._diff[pattern]]
            term.normal('Delete {}, node list: {}'.format(
                component, ','.join(uuid)))
            for _uuid in uuid:
                self.__delete_component(self._diff, component, pattern, _uuid)
                if component not in ['tikv', 'pump', 'drainer']:
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=_uuid)
                    self.act.destroy_component(component=component,
                                               pattern=pattern,
                                               node=_uuid)
                if component != 'blackbox_exporter':
                    self.topology.replace(self.topology.remove(_uuid)[0])

    def _post(self, component=None, pattern=None, node=None, role=None):
        ans = ansibleapi.ANSRunner(user=self.topology.user,
                                   topology=self.topology(),
                                   tiargs=self._args)
        act = Action(ans=ans, topo=self.topology)
        if 'pd_servers' in self._diff:
            act.deploy_component(component='pd', pattern='pd_servers')
            act.deploy_component(component='tikv', pattern='tikv_servers')
            act.deploy_component(component='tidb', pattern='tidb_servers')
            act.deploy_component(component='pump', pattern='pump_servers')
            act.deploy_component(component='drainer',
                                 pattern='drainer_servers')

        # self.deploy.deploy_component(component='prometheus', pattern='monitoring_server', ans=ans)
        # self.reload.do(component='prometheus', pattern='monitoring_server')

        term.notice('Finished scaling in.')

    def __delete_component(self,
                           config=None,
                           component=None,
                           pattern=None,
                           uuid=None):
        if component == 'pd':
            try:
                self._cluster.del_pd(uuid)
            except exceptions.TiOPSException as e:
                term.fatal(
                    'Unable to delete PD node from cluster: {}'.format(e))
                exit(1)

        if component == 'tikv':
            _tikv_info = ''
            for _tikv_node in config[pattern]:
                if _tikv_node['uuid'] != uuid:
                    continue
                if _tikv_node['offline']:
                    return
                _tikv_info = _tikv_node
            for ctikv in self._tikv_stores['stores']:
                # check if node in cluster
                if '{}:{}'.format(
                        _tikv_info['ip'],
                        _tikv_info['port']) == ctikv['store']['address']:
                    _store_id = ctikv['store']['id']

                    # delete store through api
                    try:
                        self._cluster.del_store(_store_id)
                    except exceptions.TiOPSException as e:
                        term.fatal('Unable to delete store: {}'.format(e))
                        exit(1)

        if component == 'drainer':
            _binlog = modules.BinlogAPI(topology=self.topology)
            _binlog.delete_drainer(node_id=uuid)

        if component == 'pump':
            _binlog = modules.BinlogAPI(topology=self.topology)
            _binlog.delete_pump(node_id=uuid)
예제 #3
0
class OprDestroy(OperationBase):
    def __init__(self, args=None, topology=None):
        super(OprDestroy, self).__init__(args, topology)
        self.act = Action(ans=self.ans, topo=self.topology)

    def _prepare(self, component=None, pattern=None, node=None, role=None):
        term.warn('The TiDB cluster {} ({}) is going to be destroyed.'.format(
            self.topology.cluster_name, self.topology.version))
        rm_promt = 'This operation will ' + term.warn_red('remove') \
                   + ' the TiDB cluster ' + term.highlight_red(self.topology.cluster_name) \
                   + '. It can NOT be undone. ' + term.yes_no() + ':'
        notice = term.input(rm_promt)
        if notice.lower() not in ['y', 'yes']:
            term.notice('Terminate the destroy operation.')
            raise exceptions.TiOPSRuntimeError('Operation cancelled by user.')

    def _process(self, component=None, pattern=None, node=None, role=None):
        term.info('Check ssh connection.')
        self.act.check_ssh_connection()
        term.info('Stopping TiDB cluster.')
        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            try:
                self.act.stop_component(component=component,
                                        pattern=pattern,
                                        node=node)
            except exceptions.TiOPSWarning as e:
                term.debug(str(e))
                pass

        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            term.normal('{} is being destroyed.'.format(component))
            try:
                self.act.destroy_component(component=component,
                                           pattern=pattern,
                                           node=node)
            except exceptions.TiOPSWarning as e:
                term.debug(str(e))
                pass

        # remove deploy dir
        self.ans.run_model('shell',
                           'rm -rf {{ full_deploy_dir | cluster_dir }}',
                           become=True,
                           group='*')

        self.ans.run_model('shell',
                           'rm -rf {{ full_data_dir | cluster_dir }}',
                           become=True,
                           group='*')

    def _post(self, component=None, pattern=None, node=None, role=None):
        try:
            utils.remove_dir(utils.profile_path(self.topology.cluster_dir))
        except Exception as e:
            logging.warning(e)

        term.notice('TiDB cluster destroyed.')