コード例 #1
0
ファイル: init.py プロジェクト: nrc/tiup
    def _check_ip_list(self, ipList=None):
        if not ipList:
            ipList = self.hosts

        invalid_list = []
        for ip in ipList.split(','):
            if not utils.is_valid_ip(ip):
                invalid_list.append(ip)

        if invalid_list:
            term.fatal('{} is invalid.'.format(','.join(invalid_list)))
            exit(1)
コード例 #2
0
ファイル: init.py プロジェクト: nrc/tiup
    def check_os_version(self, facts=None):
        _lower_version = []

        for _host, _vars in facts['success'].iteritems():
            # get system version
            _sysversion = str(
                _vars['ansible_facts']['ansible_distribution_version'])
            if _sysversion < '7':
                _lower_version.append([_host, _sysversion])

        if _lower_version:
            term.fatal('Some machine\'s OS version dosen\'t support.')
            _length = max(max([len(str(x[0])) for x in _lower_version]),
                          len('IP'))
            term.normal('IP'.ljust(_length + 2) + 'OS_Version')
            for _node in _lower_version:
                term.normal('{}{}'.format(_node[0].ljust(_length + 2),
                                          _node[1]))
            exit(1)
コード例 #3
0
ファイル: scale.py プロジェクト: nrc/tiup
    def __delete_component(self,
                           config=None,
                           component=None,
                           pattern=None,
                           uuid=None):
        if component == 'pd':
            try:
                self._cluster.del_pd(uuid)
            except exceptions.TiOPSException as e:
                term.fatal(
                    'Unable to delete PD node from cluster: {}'.format(e))
                exit(1)

        if component == 'tikv':
            _tikv_info = ''
            for _tikv_node in config[pattern]:
                if _tikv_node['uuid'] != uuid:
                    continue
                if _tikv_node['offline']:
                    return
                _tikv_info = _tikv_node
            for ctikv in self._tikv_stores['stores']:
                # check if node in cluster
                if '{}:{}'.format(
                        _tikv_info['ip'],
                        _tikv_info['port']) == ctikv['store']['address']:
                    _store_id = ctikv['store']['id']

                    # delete store through api
                    try:
                        self._cluster.del_store(_store_id)
                    except exceptions.TiOPSException as e:
                        term.fatal('Unable to delete store: {}'.format(e))
                        exit(1)

        if component == 'drainer':
            _binlog = modules.BinlogAPI(topology=self.topology)
            _binlog.delete_drainer(node_id=uuid)

        if component == 'pump':
            _binlog = modules.BinlogAPI(topology=self.topology)
            _binlog.delete_pump(node_id=uuid)
コード例 #4
0
ファイル: init.py プロジェクト: nrc/tiup
    def check_os_platform(self, facts=None):
        _unsupport_os = []

        # get operation system platform
        for _host, _vars in facts['success'].iteritems():
            _platform = _vars['ansible_facts']['ansible_os_family']
            if 'redhat' == _platform.lower():
                continue
            _unsupport_os.append([_host, _platform])

        if _unsupport_os:
            term.fatal(
                'Some machine\'s OS is not support, Please use Redhat / CentOS.'
            )
            _length = max(max([len(str(x[0])) for x in _unsupport_os]),
                          len('IP'))
            term.normal('IP'.ljust(_length + 2) + 'OS_Family')
            for _node in _unsupport_os:
                term.normal('{}{}'.format(_node[0].ljust(_length + 2),
                                          _node[1]))
            exit(1)
コード例 #5
0
ファイル: scale.py プロジェクト: nrc/tiup
    def _process(self, component=None, pattern=None, node=None, role=None):
        _unhealth_node = []
        for _pd_node in self._cluster.status():
            if not _pd_node['health']:
                _unhealth_node.append(_pd_node['name'])
                msg = 'Some pd node is unhealthy, maybe server stoppd or network unreachable, unhealthy node list: {}'.format(
                    ','.join(_unhealth_node))
                term.fatal(msg)
                raise exceptions.TiOPSRuntimeError(msg, operation='scaleIn')

        _current_pd_num = len(self._pd_status)
        _current_tikv_num = len(self._tikv_stores)

        if 'pd_servers' in self._diff and len(
                self._diff['pd_servers']) == _current_pd_num:
            term.fatal('Can not delete all pd node.')
            exit(1)

        if 'tikv_servers' in self._diff and len(
                self._diff['tikv_servers']) == _current_tikv_num:
            term.fatal('Can not delete all tikv node.')
            exit(1)

        term.info('Check ssh connection.')
        self.act.check_ssh_connection()

        for service in self.topology.service_group[::-1]:
            component, pattern = self.check_exist(service, self._diff)
            if not component and not pattern:
                continue
            uuid = [x['uuid'] for x in self._diff[pattern]]
            term.normal('Delete {}, node list: {}'.format(
                component, ','.join(uuid)))
            for _uuid in uuid:
                self.__delete_component(self._diff, component, pattern, _uuid)
                if component not in ['tikv', 'pump', 'drainer']:
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=_uuid)
                    self.act.destroy_component(component=component,
                                               pattern=pattern,
                                               node=_uuid)
                if component != 'blackbox_exporter':
                    self.topology.replace(self.topology.remove(_uuid)[0])
コード例 #6
0
def main(args=None):
    try:
        action = args.action
    except AttributeError:
        pass

    if action == 'version':
        print(term.plain(TiOPSVer()))
        exit(0)

    if action == 'quickdeploy':
        term.warn(
            'The quick deploy mode is for demo and testing, do NOT use in production!'
        )

        # do init
        _init = init.Init(args)
        try:
            _init.init(demo=True)
            _init.init_network(demo=True)
            _init.init_host(demo=True)
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)

        # do deploy
        topo = topology.Topology(args=args, merge=True)
        try:
            op.OprDeploy(args, topo, demo=True).do()
            op.OprStart(args, topo, demo=True).do()
            tm.TUIModule(topo, args=args).display()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSRequestError as e:
            msg = "{}, URL {} returned {}, please check the network and try again.".format(
                e.msg, e.url, e.code)
            term.error(msg)
            sys.exit(1)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)

    elif action == 'bootstrap-local':
        _init = init.Init(args)
        try:
            _init.init()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)
    elif action == 'bootstrap-ssh':
        _init = init.Init(args)
        try:
            _init.init_network()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)
    elif action == 'bootstrap-host':
        _init = init.Init(args)
        try:
            _init.init_host()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)
    else:
        try:
            if action not in ['deploy', 'display']:
                topo = topology.Topology(args)
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)

        if action == 'display':
            try:
                _cluster_name = args.cluster_name
            except AttributeError:
                _cluster_name = None
            try:
                if _cluster_name and len(_cluster_name) > 0:
                    topo = topology.Topology(args)
                    _list = False
                else:
                    topo = None
                    _list = True
                tm.TUIModule(topo, args=args).display(_list)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'deploy':
            topo = topology.Topology(args=args, merge=True)
            try:
                op.OprDeploy(args, topo).do()
                tm.TUIModule(topo, args=args).display()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSRequestError as e:
                msg = "{}, URL {} returned {}, please check the network and try again.".format(
                    e.msg, e.url, e.code)
                term.error(msg)
                sys.exit(1)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'start':
            try:
                op.OprStart(args, topo).do(node=args.node_id, role=args.role)
                tm.TUIModule(topo, args=args, status=True).display()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'stop':
            try:
                op.OprStop(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'restart':
            try:
                op.OprRestart(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'reload':
            try:
                op.OprReload(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'upgrade':
            try:
                op.OprUpgrade(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSRequestError as e:
                msg = "{}, URL {} returned {}, please check the network and try again.".format(
                    e.msg, e.url, e.code)
                term.error(msg)
                sys.exit(1)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'destroy':
            try:
                op.OprDestroy(args, topo).do()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'edit-config':
            try:
                Action(topo=topo).edit_file()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'scale-out':
            addTopo = utils.read_yaml(args.topology)
            try:
                op.OprScaleOut(args, topo, addTopo).do()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'scale-in':
            try:
                op.OprScaleIn(args, topo, args.node_id).do(node=args.node_id)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'exec':
            try:
                op.OprExec(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
コード例 #7
0
    def _process(self, component=None, pattern=None, node=None, role=None):
        if node:
            term.notice('Reload specified node in cluster.')
        elif role:
            term.notice('Reload specified role in cluster.')
        else:
            term.notice('Reload TiDB cluster.')
        _topology = self.topology.role_node(roles=role, nodes=node)

        _cluster = modules.ClusterAPI(topology=self.topology)
        _unhealth_node = []
        for _pd_node in _cluster.status():
            if not _pd_node['health']:
                _unhealth_node.append(_pd_node['name'])
                msg = 'Some pd node is unhealthy, maybe server stoppd or network unreachable, unhealthy node list: {}'.format(
                    ','.join(_unhealth_node))
                term.fatal(msg)
                raise exceptions.TiOPSRuntimeError(msg, operation='reload')

        term.info('Check ssh connection.')
        self.act.check_ssh_connection()
        # every time should only contain one item
        for service in self.topology.service_group:
            component, pattern = self.check_exist(service=service,
                                                  config=_topology)
            if not component and not pattern:
                continue
            # upgrade pd server, upgrade leader node finally
            if component == 'pd':
                _pd_list = []
                for _node in _topology[pattern]:
                    if _node['uuid'] == _cluster.pd_leader():
                        _leader = _node
                    else:
                        _pd_list.append(_node)
                _pd_list.append(_leader)

                for _node in _pd_list:
                    _uuid = _node['uuid']
                    _host = _node['ip']
                    term.normal('Reload {}, node id: {}.'.format(
                        component, _uuid))
                    if _uuid == _cluster.pd_leader():
                        _cluster.evict_pd_leader(uuid=_uuid)

                    self.act.deploy_component(component=component,
                                              pattern=pattern,
                                              node=_uuid)
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=_uuid)
                    self.act.start_component(component=component,
                                             pattern=pattern,
                                             node=_uuid)
                continue

            if pattern in [
                    'monitored_servers', 'monitoring_server', 'grafana_server',
                    'alertmanager_server'
            ]:
                if not node:
                    term.normal('Reload {}.'.format(component))
                    self.act.deploy_component(component=component,
                                              pattern=pattern)
                    self.act.stop_component(component=component,
                                            pattern=pattern)
                    self.act.start_component(component=component,
                                             pattern=pattern)
                else:
                    _uuid = [x['uuid'] for x in _topology[pattern]]
                    term.normal('Reload {}, node list: {}.'.format(
                        component, ','.join(_uuid)))
                    self.act.deploy_component(component=component,
                                              pattern=pattern,
                                              node=','.join(_uuid))
                    self.act.stop_component(component=component,
                                            pattern=pattern,
                                            node=','.join(_uuid))
                    self.act.start_component(component=component,
                                             pattern=pattern,
                                             node=','.join(_uuid))
                continue

            for _node in _topology[pattern]:
                _uuid = _node['uuid']
                _host = _node['ip']
                term.normal('Reload {}, node id: {}.'.format(component, _uuid))
                if pattern == 'tikv_servers':
                    _port = _node['port']
                    _cluster.evict_store_leaders(host=_host, port=_port)
                self.act.deploy_component(component=component,
                                          pattern=pattern,
                                          node=_uuid)
                self.act.stop_component(component=component,
                                        pattern=pattern,
                                        node=_uuid)
                self.act.start_component(component=component,
                                         pattern=pattern,
                                         node=_uuid)

                if pattern == 'tikv_servers':
                    _cluster.remove_evict(host=_host, port=_port)