Example #1
0
File: init.py Project: nrc/tiup
    def check_systemd_version(self, info=None):
        # info: systemd info callback by ansible
        _lower_version = []

        for _host, _vars in info['success'].iteritems():
            for systemd_info in _vars['results']:
                # get sysmted version
                if systemd_info['yumstate'] == 'installed':
                    _version = '{}-{}'.format(systemd_info['version'],
                                              systemd_info['release'])
                    # when version less than 219-52.el7, will record
                    if _version < '219-52.el7':
                        _lower_version.append([_host, _version])

        if _lower_version:
            term.warn(
                'Some machine\'s systemd service version lower than "219-52.el7".'
            )
            _length = max(max([len(str(x[0])) for x in _lower_version]),
                          len('IP'))
            term.normal('IP'.ljust(_length + 2) + 'Systemd_Version')
            for _node in _lower_version:
                term.normal('{}{}'.format(_node[0].ljust(_length + 2),
                                          _node[1]))
            term.warn(
                'There are some memory bugs in lower systemd version(lower than "219-52.el7"). '
                'Refer to https://access.redhat.com/discussions/3536621.')
            if not utils.ticontinue():
                exit(1)
Example #2
0
    def _process(self, component=None, pattern=None, node=None, role=None):
        # creart directory
        term.info('Create directory in all nodes.')
        for service in self.topology.service_group:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            self.act.create_directory(component=component, pattern=pattern)

        if not self.demo:
            self.act.check_machine_config()

        # start run deploy
        if self.demo:
            term.warn(
                'FirewallD is being disabled on deployment machines in quick deploy mode.'
            )
        for service in self.topology.service_group:
            component, pattern = self.check_exist(service,
                                                  config=self.topology())
            if not component and not pattern:
                continue
            term.normal('Deploy {}.'.format(component))
            self.act.deploy_component(component=component, pattern=pattern)
            self.act.deploy_firewall(component=component, pattern=pattern)

        if not self.demo:
            self.act.deploy_tool()
Example #3
0
File: init.py Project: nrc/tiup
    def check_hostname(self, facts=None):
        # facts is ansible callback
        _hostname_list = {}
        for _host, _vars in facts['success'].iteritems():
            _hostname = _vars['ansible_facts']['ansible_hostname']
            if _hostname_list.has_key(_hostname):
                _hostname_list[_hostname].append(_host)
            else:
                _hostname_list[_hostname] = [_host]

        # check if have conflict hostname between different host
        _cache_hostname_list = copy.deepcopy(_hostname_list)
        for _host_name, _ip in _hostname_list.iteritems():
            if len(_ip) == 1:
                del _cache_hostname_list[_host_name]

        if _cache_hostname_list:
            term.warn("Some machine\'s hostname conflict.")
            _length = max(
                max([len(str(x)) for x in _cache_hostname_list.keys()]),
                len('Hostname'))
            term.normal('Hostname'.ljust(_length + 2) + 'Hosts')
            for _hostname, _hosts in _cache_hostname_list.iteritems():
                term.normal('{}{}'.format(_hostname.ljust(_length + 2),
                                          ', '.join(_hosts)))
            if not utils.ticontinue():
                exit(1)
Example #4
0
File: scale.py Project: nrc/tiup
 def __init__(self, args=None, topology=None, new_srvs=None):
     if os.path.exists(topology.topology_file):
         term.warn(
             'Check TiDB cluster {} status, it may take a few minutes.'.
             format(topology.cluster_name))
         self.check_tombstone(topology, args)
     self._new_topo, self._diff = topology.add(new_srvs)
     topology.replace(self._new_topo, write=False)
     super(OprScaleOut, self).__init__(args, topology, action='deploy')
     self.act = Action(ans=self.ans, topo=self.topology)
Example #5
0
 def _prepare(self, component=None, pattern=None, node=None, role=None):
     term.warn('The TiDB cluster {} ({}) is going to be destroyed.'.format(
         self.topology.cluster_name, self.topology.version))
     rm_promt = 'This operation will ' + term.warn_red('remove') \
                + ' the TiDB cluster ' + term.highlight_red(self.topology.cluster_name) \
                + '. It can NOT be undone. ' + term.yes_no() + ':'
     notice = term.input(rm_promt)
     if notice.lower() not in ['y', 'yes']:
         term.notice('Terminate the destroy operation.')
         raise exceptions.TiOPSRuntimeError('Operation cancelled by user.')
Example #6
0
File: base.py Project: nrc/tiup
    def __init__(self, args=None, topology=None, demo=False, action=None):
        try:
            self._lock_profile()
        except NotImplementedError:
            pass
        except exceptions.TiOPSException:  # TODO: add more specific handlers
            raise

        self.topology = topology
        self._args = args

        if not demo and os.path.exists(
                self.topology.topology_file) and not action:
            term.warn(
                'Check TiDB cluster {} status, it may take a few minutes.'.
                format(self.topology.cluster_name))
            self.check_tombstone()

        self.ans = ansibleapi.ANSRunner(user=self.topology.user,
                                        topology=self.topology(),
                                        tiargs=self._args)
Example #7
0
 def run_playbook(self, playbook_path, extra_vars=None):
     """
     运行playbook
     """
     try:
         self.callback = PlayBookResultsCollector()
         if extra_vars:
             self.variable_manager.extra_vars = extra_vars
         executor = PlaybookExecutor(
             playbooks=[playbook_path],
             inventory=self.inventory,
             variable_manager=self.variable_manager,
             loader=self.loader,
             options=self.options,
             passwords=self.password,
         )
         executor._tqm._stdout_callback = self.callback
         executor.run()
     except Exception as e:
         term.warn(str(e))
         return False
Example #8
0
def main(args=None):
    try:
        action = args.action
    except AttributeError:
        pass

    if action == 'version':
        print(term.plain(TiOPSVer()))
        exit(0)

    if action == 'quickdeploy':
        term.warn(
            'The quick deploy mode is for demo and testing, do NOT use in production!'
        )

        # do init
        _init = init.Init(args)
        try:
            _init.init(demo=True)
            _init.init_network(demo=True)
            _init.init_host(demo=True)
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)

        # do deploy
        topo = topology.Topology(args=args, merge=True)
        try:
            op.OprDeploy(args, topo, demo=True).do()
            op.OprStart(args, topo, demo=True).do()
            tm.TUIModule(topo, args=args).display()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSRequestError as e:
            msg = "{}, URL {} returned {}, please check the network and try again.".format(
                e.msg, e.url, e.code)
            term.error(msg)
            sys.exit(1)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)

    elif action == 'bootstrap-local':
        _init = init.Init(args)
        try:
            _init.init()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)
    elif action == 'bootstrap-ssh':
        _init = init.Init(args)
        try:
            _init.init_network()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)
    elif action == 'bootstrap-host':
        _init = init.Init(args)
        try:
            _init.init_host()
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)
    else:
        try:
            if action not in ['deploy', 'display']:
                topo = topology.Topology(args)
        except TiOPSRuntimeError as e:
            tierror(e)
        except TiOPSException as e:
            term.debug(traceback.format_exc())
            term.fatal(str(e))
            sys.exit(1)

        if action == 'display':
            try:
                _cluster_name = args.cluster_name
            except AttributeError:
                _cluster_name = None
            try:
                if _cluster_name and len(_cluster_name) > 0:
                    topo = topology.Topology(args)
                    _list = False
                else:
                    topo = None
                    _list = True
                tm.TUIModule(topo, args=args).display(_list)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'deploy':
            topo = topology.Topology(args=args, merge=True)
            try:
                op.OprDeploy(args, topo).do()
                tm.TUIModule(topo, args=args).display()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSRequestError as e:
                msg = "{}, URL {} returned {}, please check the network and try again.".format(
                    e.msg, e.url, e.code)
                term.error(msg)
                sys.exit(1)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'start':
            try:
                op.OprStart(args, topo).do(node=args.node_id, role=args.role)
                tm.TUIModule(topo, args=args, status=True).display()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'stop':
            try:
                op.OprStop(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'restart':
            try:
                op.OprRestart(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'reload':
            try:
                op.OprReload(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'upgrade':
            try:
                op.OprUpgrade(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSRequestError as e:
                msg = "{}, URL {} returned {}, please check the network and try again.".format(
                    e.msg, e.url, e.code)
                term.error(msg)
                sys.exit(1)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'destroy':
            try:
                op.OprDestroy(args, topo).do()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'edit-config':
            try:
                Action(topo=topo).edit_file()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'scale-out':
            addTopo = utils.read_yaml(args.topology)
            try:
                op.OprScaleOut(args, topo, addTopo).do()
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'scale-in':
            try:
                op.OprScaleIn(args, topo, args.node_id).do(node=args.node_id)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
        elif action == 'exec':
            try:
                op.OprExec(args, topo).do(node=args.node_id, role=args.role)
            except TiOPSRuntimeError as e:
                tierror(e)
            except TiOPSException as e:
                term.debug(traceback.format_exc())
                term.fatal(str(e))
                sys.exit(1)
Example #9
0
    def run_model(self, module_name, module_args, become=False, register=None, with_items=None, group='*',
                  extra_vars=None, node=None):
        """
        run module from andible ad-hoc.
        module_name: ansible module_name
        module_args: ansible module args
        """
        if self.topology:
            service_names = {'node_exporter': ['monitored_servers', 'node_exporter_port'],
                             'blackbox_exporter': ['monitored_servers', 'blackbox_exporter_port'],
                             'prometheus': ['monitoring_server', 'prometheus_port'],
                             'pushgateway': ['monitoring_server', 'pushgateway_port']}

            if extra_vars in service_names and service_names[extra_vars][0] in self.inventory.get_groups_dict():
                for host in self.inventory.get_groups_dict()[service_names[extra_vars][0]]:
                    hostname = self.inventory.get_host(hostname=host)
                    service_name = '{}-{}'.format(extra_vars, self.variable_manager.get_vars(
                        host=hostname)[service_names[extra_vars][1]])
                    self.variable_manager.set_host_variable(
                        host=hostname, varname='service_name', value=service_name)

            if self.cluster_name and extra_vars:
                self.variable_manager.extra_vars = {
                    'cluster_name': self.cluster_name, 'service': extra_vars}
            else:
                self.variable_manager.extra_vars = {
                    'cluster_name': self.cluster_name}

        if register and with_items:
            task = [dict(action=dict(module=module_name,
                                     args=module_args),
                         become=become,
                         register=register,
                         with_items=with_items)]
        elif register is None and with_items:
            task = [dict(action=dict(module=module_name,
                                     args=module_args),
                         become=become,
                         with_items=with_items)]
        elif register and with_items is None:
            task = [dict(action=dict(module=module_name,
                                     args=module_args),
                         become=become,
                         register=register)]
        else:
            task = [dict(action=dict(module=module_name,
                                     args=module_args),
                         become=become)]

        if node:
            node_list = node.split(',')
            if len(node_list) == 1:
                node_str = '{},'.format(node)
            else:
                node_str = ','.join(node_list)

        play_source = dict(
            name="Ansible Play",
            hosts=self.ips if self.ips else (node_str if node else group),
            gather_facts='no',
            tasks=task
        )

        play = Play().load(play_source, variable_manager=self.variable_manager, loader=self.loader)
        tqm = None
        self.callback = ModelResultsCollector()
        import traceback
        try:
            tqm = TaskQueueManager(
                inventory=self.inventory,
                variable_manager=self.variable_manager,
                loader=self.loader,
                options=self.options,
                passwords=self.password,
                stdout_callback="minimal",
            )
            tqm._stdout_callback = self.callback
            tqm.run(play)
        except Exception as e:
            term.warn(str(e))
            term.debug(traceback.print_exc())
        finally:
            if tqm is not None:
                tqm.cleanup()

        result = self.get_model_result()
        failed = {}
        unreachable = {}

        offline_list = []

        if self.topology:
            for grp in ['drainer_servers', 'pump_servers', 'tikv_servers']:
                if not self.topology.has_key(grp) or not self.topology[grp]:
                    continue
                for _node in self.topology[grp]:
                    if _node['offline']:
                        offline_list.append(_node['uuid'])

        if result['success']:
            for _uuid, _info in result['success'].iteritems():
                _ip = _info['ansible_host']
                if _info.has_key('stderr') and _info['stderr']:
                    try:
                        failed[_uuid][_ip].append(_info['stderr'])
                    except:
                        if not failed.has_key(_uuid):
                            failed[_uuid] = {}
                        failed[_uuid][_ip] = [_info['stderr']]

        if result['failed']:
            for _uuid, _info in result['failed'].iteritems():
                _ip = _info['ansible_host']
                if _info.has_key('stderr') and _info['stderr']:
                    try:
                        failed[_uuid][_ip].append(_info['stderr'])
                    except:
                        if not failed.has_key(_uuid):
                            failed[_uuid] = {}
                        failed[_uuid][_ip] = [_info['stderr']]
                if _info.has_key('stdout') and _info['stdout']:
                    try:
                        failed[_uuid][_ip].append(_info['stdout'])
                    except:
                        if not failed.has_key(_uuid):
                            failed[_uuid] = {}
                        failed[_uuid][_ip] = [_info['stdout']]
                if _info.has_key('msg') and \
                        _info['msg'] and \
                        "'full_data_dir' is undefined" not in _info['msg'] and \
                        not re.search(r'Could not find.*firewalld', _info['msg']):
                    if _uuid in offline_list and re.search(r'the.*port.*is not up', _info['msg']):
                        continue
                    try:
                        failed[_uuid][_ip].append(_info['msg'])
                    except:
                        if not failed.has_key(_uuid):
                            failed[_uuid] = {}
                        failed[_uuid][_ip] = [_info['msg']]

        if result['unreachable']:
            for _uuid, _info in result['unreachable'].iteritems():
                _ip = _info['ansible_host']
                if _info.has_key('stderr') and _info['stderr']:
                    try:
                        unreachable[_uuid][_ip].append(_info['stderr'])
                    except:
                        if not unreachable.has_key(_uuid):
                            unreachable[_uuid] = {}
                        unreachable[_uuid][_ip] = [_info['stderr']]
                if _info.has_key('stdout') and _info['stdout']:
                    try:
                        unreachable[_uuid][_ip].append(_info['stdout'])
                    except:
                        if not unreachable.has_key(_uuid):
                            unreachable[_uuid] = {}
                        unreachable[_uuid][_ip] = [_info['stdout']]
                if _info.has_key('msg') and _info['msg']:
                    try:
                        unreachable[_uuid][_ip].append(_info['msg'])
                    except:
                        if not unreachable.has_key(_uuid):
                            unreachable[_uuid] = {}
                        unreachable[_uuid][_ip] = [_info['msg']]

        if not failed and not unreachable:
            return result

        msg = {}
        msg['failed'] = failed
        msg['unreachable'] = unreachable
        raise exceptions.TiOPSRuntimeError(msg, result, tp='ansible')