예제 #1
0
 def setup_method(self, *args):  # pylint: disable=arguments-differ
     """Initialize default properties and instances."""
     self.target = Target(nodeset('node[1-2]'))
     self.commands = [Command('command1', ok_codes=[0, 100]), Command('command2', timeout=5)]
     self.worker = mock.MagicMock()
     self.worker.current_node = 'node1'
     self.worker.command = 'command1'
     self.worker.nodes = self.target.hosts
     self.handler = None
     self.args = args
    def first_run(self, has_systemd: bool = True) -> Iterator[Tuple]:
        """Perform the first Puppet run on a clean host without using custom wrappers.

        Arguments:
            has_systemd (bool, optional): if the host has systemd as init system.

        """
        commands = []
        if has_systemd:
            commands += [
                "systemctl stop puppet.service",
                "systemctl reset-failed puppet.service || true",
            ]

        commands += [
            "puppet agent --enable",
            Command(
                ("puppet agent --onetime --no-daemonize --verbose --no-splay --show_diff --ignorecache "
                 "--no-usecacheonfailure"),
                timeout=10800,
            ),
        ]

        logger.info(
            "Starting first Puppet run (sit back, relax, and enjoy the wait)")
        results = self._remote_hosts.run_sync(*commands,
                                              print_output=False,
                                              print_progress_bars=False)
        logger.info("First Puppet run completed")
        return results
예제 #3
0
def check_patterns_in_repo(host_paths, patterns):
    """Git grep for all the given patterns in the given hosts and path and ask for confirmation if any is found.

    Arguments:
        host_paths (sequence): a sequence of 2-item tuples with the RemoteHost instance and the path of the
            repositories to check.
        patterns (sequence): a sequence of patterns to check.

    """
    grep_command = "git -C '{{path}}' grep -E '({patterns})'".format(
        patterns='|'.join(patterns))
    ask = False
    for remote_host, path in host_paths:
        logger.info('Looking for matches in %s:%s', remote_host, path)
        for _nodeset, _output in remote_host.run_sync(
                Command(grep_command.format(path=path), ok_codes=[])):
            ask = True

    if ask:
        ask_confirmation(
            'Found match(es) in the Puppet or mediawiki-config repositories '
            '(see above), proceed anyway?')
    else:
        logger.info(
            'No matches found in the Puppet or mediawiki-config repositories')
예제 #4
0
    def _populate_puppetdb(self):
        """Run Puppet in noop mode to populate the exported resources in PuppetDB to downtime it on Icinga."""
        self.remote_installer.run_sync(Command(
            'puppet agent -t --noop &> /dev/null', ok_codes=[]),
                                       print_progress_bars=False)
        self.host_actions.success(
            'Run Puppet in NOOP mode to populate exported resources in PuppetDB'
        )

        @retry(tries=50, backoff_mode='linear')
        def poll_puppetdb():
            """Poll PuppetDB until we find the Nagios_host resource for the newly installed host."""
            puppetdb_host = self.dns.resolve_ptr(
                self.dns.resolve_ipv4('puppetdb-api.discovery.wmnet')[0])[0]
            response = requests.post(
                f'https://{puppetdb_host}/pdb/query/v4/resources/Nagios_host/{self.host}'
            )
            json_response = response.json()
            if not json_response:  # PuppetDB returns empty list for non-matching results
                raise SpicerackError(
                    f'Nagios_host resource with title {self.host} not found yet'
                )

            if len(json_response) != 1:
                raise RuntimeError(
                    f'Expected 1 result from PuppetDB got {len(json_response)}'
                )
            if json_response[0]['exported'] is not True:
                raise RuntimeError(
                    f'Expected the Nagios_host resource to be exported, got: {json_response[0]["exported"]}'
                )

        poll_puppetdb()
        self.host_actions.success(
            'Found Nagios_host resource for this host in PuppetDB')
    def get_status(self, service_re: str = "") -> HostsStatus:
        """Get the current status of the given hosts from Icinga.

        Arguments:
            service_re (str): if non-empty, the regular expression matching service names

        Returns:
            spicerack.icinga.HostsStatus: the instance that represents the status for the given hosts.

        Raises:
            IcingaError: if unable to get the status.
            IcingaStatusParseError: when failing to parse the status.
            IcingaStatusNotFoundError: if a host is not found in the Icinga status.
            re.error: if service_re is an invalid regular expression.

        """
        if service_re:
            # Compile the regex and ignore the result, in order to raise re.error if it's malformed.
            re.compile(service_re)

        # icinga-status exits with non-zero exit code on missing and non-optimal hosts.
        verbatim = " --verbatim-hosts" if self._verbatim_hosts else ""
        services = (" --services " +
                    shlex.quote(service_re)) if service_re else ""
        command = Command(
            f'/usr/local/bin/icinga-status -j{verbatim}{services} "{self._target_hosts}"',
            ok_codes=[],
        )
        for _, output in self._icinga_host.run_sync(
                command,
                is_safe=True,
                print_output=False,
                print_progress_bars=False
        ):  # icinga-status is a read-only script
            json_status = output.message().decode()
            break
        else:
            raise IcingaError(
                "Unable to get the status for the given hosts, no output from icinga-status"
            )

        try:
            status = json.loads(json_status)
        except json.JSONDecodeError as e:
            raise IcingaStatusParseError(
                "Unable to parse Icinga status") from e

        missing_hosts = [
            hostname for hostname, host_status in status.items()
            if host_status is None
        ]
        if missing_hosts:
            raise IcingaStatusNotFoundError(missing_hosts)

        return HostsStatus({
            hostname: HostStatus(**host_status)
            for hostname, host_status in status.items()
        })
    def _prepend_sudo(command: Union[str, Command]) -> Union[str, Command]:
        if isinstance(command, str):
            return "sudo -i " + command

        return Command(
            "sudo -i " + command.command,
            timeout=command.timeout,
            ok_codes=command.ok_codes,
        )
예제 #7
0
    def setup_method(self, _, task_self):  # pylint: disable=arguments-differ
        """Initialize default properties and instances."""
        self.config = {
            'clustershell': {
                'ssh_options': ['-o StrictHostKeyChecking=no', '-o BatchMode=yes'],
                'fanout': 3}}

        self.target = Target(nodeset('node[1-2]'))
        self.worker = clustershell.worker_class(self.config, self.target)
        self.commands = [Command('command1'), Command('command2', ok_codes=[0, 100], timeout=5)]
        self.task_self = task_self
        # Mock default handlers
        clustershell.DEFAULT_HANDLERS = {
            'sync': mock.MagicMock(spec_set=clustershell.SyncEventHandler),
            'async': mock.MagicMock(spec_set=clustershell.AsyncEventHandler)}

        # Initialize the worker
        self.worker.commands = self.commands
    def test_recheck_failed_services_optimal(self):
        """It should force a recheck of all services for the hosts on the Icinga server."""
        with open(get_fixture_path("icinga",
                                   "status_with_services.json")) as f:
            set_mocked_icinga_host_output(self.mocked_icinga_host, f.read())

        self.icinga_hosts.recheck_failed_services()
        # This also ensures that we are not making an additional call of run_sync in the recheck method
        self.mocked_icinga_host.run_sync.assert_called_with(
            Command('/usr/local/bin/icinga-status -j "host1"', ok_codes=[]),
            is_safe=True,
            print_output=False,
            print_progress_bars=False,
        )
예제 #9
0
def find_kerberos_credentials(remote_host, decom_hosts):
    """Check if any host provided has a kerberos keytab stored on the KDC hosts."""
    cred_found = False
    logger.info('Looking for Kerberos credentials on KDC kadmin node.')
    for host in decom_hosts:
        find_keytabs_command = 'find {} -name "{}*"'.format(
            KERBEROS_KDC_KEYTAB_PATH, host)
        check_princs_command = '/usr/local/sbin/manage_principals.py list "*{}*"'.format(
            host)
        cumin_commands = [
            Command(find_keytabs_command, ok_codes=[]),
            Command(check_princs_command, ok_codes=[])
        ]
        for _nodeset, _output in remote_host.run_sync(*cumin_commands):
            cred_found = True

    if cred_found:
        logger.info(
            'Please follow this guide to drop unused credentials: '
            'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Kerberos'
            '#Delete_Kerberos_principals_and_keytabs_when_a_host_is_decommissioned'
        )
    else:
        logger.info('No Kerberos credentials found.')
    def stop_periodic_jobs(self, datacenter: str) -> None:
        """Remove and ensure MediaWiki periodic jobs are disabled in the given DC.

        Arguments:
            datacenter (str): the name of the datacenter to work on.

        Raises:
            spicerack.remote.RemoteExecutionError: on failure.

        """
        targets = self.get_maintenance_host(datacenter)
        logger.info("Disabling MediaWiki periodic jobs in %s", datacenter)

        pkill_ok_codes = [0, 1]  # Accept both matches and no matches
        # Stop all systemd job units and timers
        targets.run_async("systemctl stop mediawiki_job_*")
        targets.run_async(
            # Kill MediaWiki wrappers, in case someone has started one manually. See modules/scap/manifests/scripts.pp
            # in the Puppet repo.
            Command('pkill --full "/usr/local/bin/foreachwiki"',
                    ok_codes=pkill_ok_codes),
            Command(
                'pkill --full "/usr/local/bin/foreachwikiindblist"',
                ok_codes=pkill_ok_codes,
            ),
            Command('pkill --full "/usr/local/bin/expanddblist"',
                    ok_codes=pkill_ok_codes),
            Command('pkill --full "/usr/local/bin/mwscript"',
                    ok_codes=pkill_ok_codes),
            Command('pkill --full "/usr/local/bin/mwscriptwikiset"',
                    ok_codes=pkill_ok_codes),
            # Kill all remaining PHP (but not php-fpm) processes for all users
            Command("killall -r 'php$'", ok_codes=[]),
            "sleep 5",
            # No more time to be gentle
            Command("killall -9 -r 'php$'", ok_codes=[]),
            "sleep 1",
        )
        self.check_periodic_jobs_disabled(datacenter)

        try:
            # Look for remaining PHP (but not php-fpm) processes. php-fpm is used for
            # serving noc.wikimedia.org, which is independent of periodic jobs
            targets.run_sync("! pgrep -c 'php$'", is_safe=True)
        except RemoteExecutionError:
            # We just log an error, don't actually report a failure to the system. We can live with this.
            logger.error(
                "Stray php processes still present on the %s maintenance host, please check",
                datacenter)
예제 #11
0
    def _httpbb(self):
        """Run the httpbb tests."""
        if not self.args.httpbb:
            return

        command = Command(
            f'httpbb /srv/deployment/httpbb-tests/appserver/* --host={self.fqdn}',
            timeout=120)
        deployment_host = self.remote.query(
            self.dns.resolve_cname('deployment.eqiad.wmnet'))
        logger.info('Running httpbb tests')
        try:
            deployment_host.run_sync(command, print_progress_bars=False)
            self.host_actions.success('Run of httpbb tests was successful')
        except RemoteExecutionError:
            # We don't want to fail upon this failure, this is just a validation test for the user.
            self.host_actions.warning('//Failed to run httpbb tests//')
    def regenerate_certificate(self) -> Dict[str, str]:
        """Delete the local Puppet certificate and generate a new CSR.

        Returns:
            dict: a dictionary with hostnames as keys and CSR fingerprint as values.

        """
        logger.info("Deleting local Puppet certificate on %d hosts: %s",
                    len(self), self)
        self._remote_hosts.run_sync("rm -rfv /var/lib/puppet/ssl")

        fingerprints = {}
        errors = []
        # The return codes for the cert generation are not well defined, we'll
        # check if it worked by searching for the fingerprint and parsing the
        # output.
        command = Command("puppet agent --test --color=false", ok_codes=[])
        logger.info("Generating a new Puppet certificate on %d hosts: %s",
                    len(self), self)
        for nodeset, output in self._remote_hosts.run_sync(command,
                                                           print_output=False):
            for line in output.message().decode().splitlines():
                if line.startswith("Error:"):
                    errors.append((nodeset, line))
                    continue

                if "Certificate Request fingerprint" not in line:
                    continue

                fingerprint = ":".join(line.split(":")[2:]).strip()
                if not fingerprint:
                    continue

                logger.info("Generated CSR for host %s: %s", nodeset,
                            fingerprint)
                for host in nodeset:
                    fingerprints[host] = fingerprint

        if len(fingerprints) != len(self):
            raise PuppetHostsError(
                "Unable to find CSR fingerprints for all hosts, detected errors are:\n"
                + "\n".join(f"{nodeset}: {line}" for nodeset, line in errors))

        return fingerprints
    def run(  # pylint: disable=too-many-arguments
        self,
        timeout: int = 300,
        enable_reason: Optional[Reason] = None,
        quiet: bool = False,
        failed_only: bool = False,
        force: bool = False,
        attempts: int = 0,
        batch_size: int = 10,
    ) -> None:
        """Run Puppet.

        Arguments:
            timeout (int, optional): the timeout in seconds to set in Cumin for the execution of the command.
            enable_reason (spicerack.administrative.Reason, optional): the reason to use to contextually re-enable
                Puppet if it was disabled.
            quiet (bool, optional): suppress Puppet output if True.
            failed_only (bool, optional): run Puppet only if the last run failed.
            force (bool, optional): forcely re-enable Puppet if it was disabled with ANY message.
            attempts (int, optional): override the default number of attempts waiting that an in-flight Puppet run
                completes before timing out as set in run-puppet-agent.
            batch_size (int, optional): how many concurrent Puppet runs to perform. The default value is tailored to
                not overload the Puppet masters.

        """
        args = []
        if enable_reason is not None:
            args += ["--enable", enable_reason.quoted()]
        if quiet:
            args.append("--quiet")
        if failed_only:
            args.append("--failed-only")
        if force:
            args.append("--force")
        if attempts:
            args += ["--attempts", str(attempts)]

        args_string = " ".join(args)
        command = f"run-puppet-agent {args_string}"
        logger.info("Running Puppet with args %s on %d hosts: %s", args_string,
                    len(self), self)
        self._remote_hosts.run_sync(Command(command, timeout=timeout),
                                    batch_size=batch_size)
    def check_periodic_jobs_disabled(self, datacenter: str) -> None:
        """Check that MediaWiki periodic jobs are not enabled in the given DC.

        Arguments:
            datacenter (str): the name of the datacenter to work on.

        Raises:
            spicerack.remote.RemoteExecutionError: on failure.

        """
        targets = self.get_maintenance_host(datacenter)
        targets.run_async(
            Command(
                # List all timers that start with mediawiki_job_
                "systemctl list-units 'mediawiki_job_*' --no-legend "
                # Just get the timer name
                "| awk '{print $1}' "
                # For each, check `systemd is-enabled`, which will pass if
                # the unit is enabled. Invert the status code so only disabled
                # pass. 255 instructs xargs to immediately abort.
                "| xargs -n 1 sh -c 'systemctl is-enabled $0 && exit 255 || exit 0'",
            ),
            is_safe=True,
        )
예제 #15
0
def run(args, spicerack):  # pylint: disable=too-many-locals
    """Required by Spicerack API."""
    remote = spicerack.remote()
    netbox_hostname = spicerack.dns().resolve_cname(NETBOX_DOMAIN)
    netbox_host = remote.query(netbox_hostname)
    netbox_hosts = remote.query(NETBOX_HOSTS_QUERY)
    reason = spicerack.admin_reason(args.message, task_id=args.task_id)
    # Always set an accessible CWD for runuser because the Python git module passes it to Popen
    base_command = ('cd /tmp && runuser -u {user} -- python3 '
                    '/srv/deployment/netbox-extras/dns/generate_dns_snippets.py').format(user=NETBOX_USER)

    extra_options = ''
    if args.emergency_manual_edit:
        extra_options = '--keep-files '
    command_str = ('{base} commit {opts}--batch "{owner}: {msg}"').format(
        opts=extra_options, base=base_command, owner=reason.owner, msg=args.message)
    # NO_CHANGES_RETURN_CODE = 99 in generate_dns_snippets.py
    command = Command(command_str, ok_codes=[0, 99])

    logger.info('Generating the DNS records from Netbox data. It will take a couple of minutes.')
    results = netbox_host.run_sync(command, is_safe=True)
    metadata = {}
    for _, output in results:
        lines = output.message().decode()
        for line in lines.splitlines():
            if line.startswith('METADATA:'):
                metadata = json.loads(line.split(maxsplit=1)[1])
                break

    if spicerack.dry_run:
        if not metadata.get('no_changes', False):
            logger.info('Bailing out in DRY-RUN mode. Generated temporary files are available on %s:%s',
                        netbox_hostname, metadata.get('path'))
        return

    if args.emergency_manual_edit:
        logger.info('Generated temporary files are available on %s:%s', netbox_hostname, metadata.get('path'))
        logger.info('SSH there, as root modify any file, git stage them and run "git commit --amend" to commit them')
        logger.info('Then run "git log --pretty=oneline -1" and copy the new SHA1 of HEAD')
        metadata['sha1'] = input('Enter the new SHA1 of the commit to push: ')
        metadata['no_changes'] = False

    if metadata.get('no_changes', False):
        if args.force:
            logger.info('No changes to deploy but --force set to %s, continuing.', args.force)
            sha1 = args.force
        else:
            logger.info('No changes to deploy.')
            return
    else:
        ask_confirmation('Have you checked that the diff is OK?')

        sha1 = metadata.get('sha1', '')
        if not sha1:
            raise RuntimeError('Unable to fetch SHA1 from commit metadata: {meta}'.format(meta=metadata))

        command = ('{base} push "{path}" "{sha1}"').format(base=base_command, path=metadata.get('path', ''), sha1=sha1)
        results = netbox_host.run_sync(command)

    passive_netbox_hosts = remote.query(str(netbox_hosts.hosts - netbox_host.hosts))
    logger.info('Updating the Netbox passive copies of the repository on %s', passive_netbox_hosts)
    passive_netbox_hosts.run_sync('runuser -u {user} -- git -C "{path}" fetch {host} master:master'.format(
        path=NETBOX_BARE_REPO_PATH, user=NETBOX_USER, host=netbox_hostname))

    authdns_hosts = remote.query(AUTHDNS_HOSTS_QUERY)
    logger.info('Updating the authdns copies of the repository on %s', authdns_hosts)
    authdns_hosts.run_sync(
        'runuser -u {user} -- git -C "{path}" fetch && git -C "{path}" merge --ff-only {sha1}'.format(
            path=AUTHDNS_NETBOX_CHECKOUT_PATH, user=AUTHDNS_USER, sha1=sha1))

    if args.skip_authdns_update:
        logger.warning(('ATTENTION! Skipping deploy of the updated zonefiles. The next manual authdns-update or '
                        'run of this cookbook will deploy the changes!'))
    else:
        logger.info('Deploying the updated zonefiles on %s', authdns_hosts)
        authdns_hosts.run_sync('cd {git} && utils/deploy-check.py -g {netbox} --deploy'.format(
            git=AUTHDNS_DNS_CHECKOUT_PATH, netbox=AUTHDNS_NETBOX_CHECKOUT_PATH))
예제 #16
0
def test_node_class_instantiation():
    """Default values should be set when a Node instance is created."""
    node = clustershell.Node('name', [Command('command1'), Command('command2')])
    assert node.running_command_index == -1
    assert isinstance(node.state, State)