def sync_canary(self, canaries=None): """ Sync canary hosts :param canaries: Iterable of canary servers to sync """ if not canaries: return sync_cmd = self._apache_sync_command(self.get_master_list()) sync_cmd.append(socket.getfqdn()) update_canaries = ssh.Job( canaries, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_canaries.command(sync_cmd) update_canaries.progress( log.reporter( 'check-canaries', self.config['fancy_progress'] ) ) succeeded, failed = update_canaries.run() if failed: self.get_logger().warning( '%d canaries had sync errors', failed) self.soft_errors = True
def _after_cluster_sync(self): target_hosts = self._get_target_list() # Ask apaches to rebuild l10n CDB files with log.Timer('scap-cdb-rebuild', self.get_stats()): rebuild_cdbs = ssh.Job( target_hosts, user=self.config['ssh_user'], key=self.get_keyholder_key()) rebuild_cdbs.shuffle() rebuild_cdbs.command( 'sudo -u mwdeploy -n -- %s cdb-rebuild' % self.get_script_path()) rebuild_cdbs.progress( log.reporter( 'scap-cdb-rebuild', self.config['fancy_progress'])) succeeded, failed = rebuild_cdbs.run() if failed: self.get_logger().warning( '%d hosts had scap-cdb-rebuild errors', failed) self.soft_errors = True # Update and sync wikiversions.php succeeded, failed = tasks.sync_wikiversions( target_hosts, self.config, key=self.get_keyholder_key()) if failed: self.get_logger().warning( '%d hosts had sync_wikiversions errors', failed) self.soft_errors = True tasks.clear_message_blobs() self._invalidate_opcache()
def _after_cluster_sync(self): """ Need to remove cache dir manually after sync """ branch_cache = os.path.join(self.branch_deploy_dir, 'cache') target_hosts = self._get_target_list() cmd = ['/bin/rm', '-rf', branch_cache] with log.Timer('clean-remote-caches', self.get_stats()): remove_remote_dirs = ssh.Job( target_hosts, user=self.config['ssh_user'], key=self.get_keyholder_key() ) remove_remote_dirs.command(cmd) remove_remote_dirs.progress( log.reporter( 'clean-remote-caches', self.config['fancy_progress'] ) ) success, fail = remove_remote_dirs.run() if fail: self.get_logger().warning( '%d hosts failed to remove cache', fail ) self.soft_errors = True
def _after_cluster_sync(self): target_hosts = self._get_target_list() # Ask apaches to rebuild l10n CDB files with log.Timer('scap-cdb-rebuild', self.get_stats()): rebuild_cdbs = ssh.Job(target_hosts, user=self.config['ssh_user'], key=self.get_keyholder_key()) rebuild_cdbs.shuffle() rebuild_cdbs.command('sudo -u mwdeploy -n -- %s cdb-rebuild' % self.get_script_path()) rebuild_cdbs.progress( log.reporter('scap-cdb-rebuild', self.config['fancy_progress'])) succeeded, failed = rebuild_cdbs.run() if failed: self.get_logger().warning( '%d hosts had scap-cdb-rebuild errors', failed) self.soft_errors = True # Update and sync wikiversions.php succeeded, failed = tasks.sync_wikiversions( target_hosts, self.config, key=self.get_keyholder_key()) if failed: self.get_logger().warning('%d hosts had sync_wikiversions errors', failed) self.soft_errors = True tasks.clear_message_blobs() self._invalidate_opcache() self._restart_php()
def sync_canary(self, canaries=None): """ Sync canary hosts :param canaries: Iterable of canary servers to sync """ if not canaries: return sync_cmd = self._apache_sync_command(self.get_master_list()) # Go ahead and attempt to restart php for canaries if '--no-php-restart' in sync_cmd: sync_cmd.remove('--no-php-restart') sync_cmd.append(socket.getfqdn()) update_canaries = ssh.Job(canaries, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_canaries.command(sync_cmd) update_canaries.progress( log.reporter('check-canaries', self.config['fancy_progress'])) succeeded, failed = update_canaries.run() if failed: self.get_logger().warning('%d canaries had sync errors', failed) self.soft_errors = True
def _after_cluster_sync(self): # Rebuild l10n CDB files target_hosts = self._get_target_list() with log.Timer('scap-cdb-rebuild', self.get_stats()): rebuild_cdbs = ssh.Job( target_hosts, user=self.config['ssh_user'], key=self.get_keyholder_key()) rebuild_cdbs.shuffle() cdb_cmd = 'sudo -u mwdeploy -n -- {} cdb-rebuild --version {}' cdb_cmd = cdb_cmd.format( self.get_script_path(), self.arguments.version ) rebuild_cdbs.command(cdb_cmd) rebuild_cdbs.progress( log.reporter( 'scap-cdb-rebuild', self.config['fancy_progress'])) succeeded, failed = rebuild_cdbs.run() if failed: self.get_logger().warning( '%d hosts had scap-cdb-rebuild errors', failed) self.soft_errors = True tasks.clear_message_blobs() # Globally invalidate opcache. TODO: is this needed? self._invalidate_opcache(target_hosts)
def execute_remote(self, description, targets, command): with log.Timer(description, self.get_stats()): clean_job = ssh.Job(targets, user=self.config['ssh_user']) clean_job.shuffle() clean_job.command(command) clean_job.progress( log.reporter(description, self.config['fancy_progress'])) succeeded, failed = clean_job.run() if failed: self.get_logger().warning('%d had clean errors', failed)
def execute_stage_on_group(self, stage, group, targets): """ Execute a deploy stage for the given group targets. :param stage: deploy stage. :param group: deploy group. :param targets: group targets. :yields: (host, status) """ logger = self.get_logger() deploy_local_cmd = [self.get_script_path(), 'deploy-local'] batch_size = self._get_batch_size(stage) deploy_local_cmd.append('-v') deploy_local_cmd += ['--repo', self.config['git_repo']] if self.arguments.force: deploy_local_cmd.append('--force') deploy_local_cmd += ['-g', group, stage] deploy_local_cmd.append('--refresh-config') logger.debug('Running remote deploy cmd {}'.format(deploy_local_cmd)) deploy_stage = ssh.Job( hosts=targets, user=self.config['ssh_user'], key=self.get_keyholder_key(), verbose=self.verbose) deploy_stage.output_handler = ssh.JSONOutputHandler deploy_stage.max_failure = self.MAX_FAILURES deploy_stage.command(deploy_local_cmd) display_name = self._get_stage_name(stage) progress_message = '{}: {} stage(s)'.format(self.repo, display_name) deploy_stage.progress( log.reporter(progress_message, self.config['fancy_progress'])) failed = 0 for host, status in deploy_stage.run_with_status(batch_size): if status != 0: failed += 1 yield host, status if failed: logger.warning('%d targets had deploy errors', failed)
def sync_wikiversions(hosts, cfg, key=None): """ Rebuild and sync wikiversions.php to the cluster. :param hosts: List of hosts to sync to :param cfg: Dict of global configuration values """ stats = log.Stats(cfg['statsd_host'], int(cfg['statsd_port'])) with log.Timer('sync_wikiversions', stats): compile_wikiversions('stage', cfg) rsync = ssh.Job(hosts, user=cfg['ssh_user'], key=key).shuffle() rsync.command('sudo -u mwdeploy -n -- /usr/bin/rsync -l ' '%(master_rsync)s::common/wikiversions*.{json,php} ' '%(deploy_dir)s' % cfg) return rsync.progress( log.reporter('sync_wikiversions', cfg['fancy_progress'])).run()
def execute_stage_on_group(self, stage, group, targets): """ Execute a deploy stage for the given group targets. :param stage: deploy stage. :param group: deploy group. :param targets: group targets. :yields: (host, status) """ logger = self.get_logger() deploy_local_cmd = [self.get_script_path(), 'deploy-local'] batch_size = self._get_batch_size(stage) deploy_local_cmd.append('-v') deploy_local_cmd += ['--repo', self.config['git_repo']] if self.arguments.force: deploy_local_cmd.append('--force') deploy_local_cmd += ['-g', group, stage] deploy_local_cmd.append('--refresh-config') logger.debug('Running remote deploy cmd {}'.format(deploy_local_cmd)) deploy_stage = ssh.Job(hosts=targets, user=self.config['ssh_user'], key=self.get_keyholder_key(), verbose=self.verbose) deploy_stage.output_handler = ssh.JSONOutputHandler deploy_stage.max_failure = self.MAX_FAILURES deploy_stage.command(deploy_local_cmd) display_name = self._get_stage_name(stage) progress_message = '{}: {} stage(s)'.format(self.repo, display_name) deploy_stage.progress( log.reporter(progress_message, self.config['fancy_progress'])) failed = 0 for host, status in deploy_stage.run_with_status(batch_size): if status != 0: failed += 1 yield host, status if failed: logger.warning('%d targets had deploy errors', failed)
def sync_wikiversions(hosts, cfg, key=None): """ Rebuild and sync wikiversions.php to the cluster. :param hosts: List of hosts to sync to :param cfg: Dict of global configuration values """ stats = log.Stats(cfg['statsd_host'], int(cfg['statsd_port'])) with log.Timer('sync_wikiversions', stats): compile_wikiversions('stage', cfg) rsync = ssh.Job(hosts, user=cfg['ssh_user'], key=key).shuffle() rsync.command( 'sudo -u mwdeploy -n -- /usr/bin/rsync -l ' '%(master_rsync)s::common/wikiversions*.{json,php} ' '%(deploy_dir)s' % cfg) return rsync.progress( log.reporter('sync_wikiversions', cfg['fancy_progress'])).run()
def main(self, *extra_args): self._check_sync_flag() full_target_list = self._get_target_list() hosts = self.arguments.host if hosts is None: hosts = self._get_canary_list() canaries = [node for node in hosts if node in full_target_list] if not canaries: errmsg = "host argument(s) did not match any valid targets." raise ValueError(errmsg) with lock.Lock(self.get_lock_file(), self.arguments.message): synchosts = ", ".join(canaries) self.get_logger().info("Syncing canaries: %s", (synchosts)) self._before_cluster_sync() self._sync_common() self._after_sync_common() self._sync_masters() sync_cmd = self._apache_sync_command( self.get_master_list()) sync_cmd.append(socket.getfqdn()) update_canaries = ssh.Job( canaries, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_canaries.command(sync_cmd) update_canaries.progress( log.reporter( 'sync-canaries', self.config['fancy_progress'])) _succeeded, failed = update_canaries.run() if failed: self.get_logger().warning( '%d canaries had sync errors', failed) self.soft_errors = True
def run_with_status(self, batch_size=DEFAULT_BATCH_SIZE): """ Run the job, report progress, and yield host/status as execution completes. :yields: (host, status) :raises: RuntimeError if command has not been set """ if not self._command: raise RuntimeError('Command must be provided') if not self._reporter: self._reporter = log.reporter(self._command) if self._hosts: self._reporter.expect(len(self._hosts)) self._reporter.start() for host, status, ohandler in cluster_ssh( self._hosts, self._command, self._user, self._key, batch_size, self.max_failure, self.output_handler, self.verbose): if status == 0: self._reporter.add_success() else: self.get_logger().warning( '%s on %s returned [%d]: %s', self._command, host, status, ohandler.output) self._reporter.add_failure() yield host, status self._reporter.finish() else: self.get_logger().warning( 'Job %s called with an empty host list.', self._command)
def master_only_cmd(self, timer, cmd): """ Run a command on all other master servers than the one we're on :param timer: String name to use in timer/logging :param cmd: List of command/parameters to be executed """ masters = self.get_master_list() with log.Timer(timer, self.get_stats()): update_masters = ssh.Job(masters, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_masters.exclude_hosts([socket.getfqdn()]) update_masters.command(cmd) update_masters.progress( log.reporter(timer, self.config['fancy_progress'])) succeeded, failed = update_masters.run() if failed: self.get_logger().warning('%d masters had sync errors', failed) self.soft_errors = True
def main(self, *extra_args): self._check_sync_flag() full_target_list = self._get_target_list() hosts = self.arguments.host if hosts is None: hosts = self._get_canary_list() canaries = [node for node in hosts if node in full_target_list] if not canaries: errmsg = "host argument(s) did not match any valid targets." raise ValueError(errmsg) with lock.Lock(self.get_lock_file(), self.arguments.message): synchosts = ", ".join(canaries) self.get_logger().info("Syncing canaries: %s", (synchosts)) self._before_cluster_sync() self._sync_common() self._after_sync_common() self._sync_masters() sync_cmd = self._apache_sync_command(self.get_master_list()) sync_cmd.append(socket.getfqdn()) update_canaries = ssh.Job(canaries, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_canaries.command(sync_cmd) update_canaries.progress( log.reporter('sync-canaries', self.config['fancy_progress'])) _succeeded, failed = update_canaries.run() if failed: self.get_logger().warning('%d canaries had sync errors', failed) self.soft_errors = True
def master_only_cmd(self, timer, cmd): """ Run a command on all other master servers than the one we're on :param timer: String name to use in timer/logging :param cmd: List of command/parameters to be executed """ masters = self.get_master_list() with log.Timer(timer, self.get_stats()): update_masters = ssh.Job( masters, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_masters.exclude_hosts([socket.getfqdn()]) update_masters.command(cmd) update_masters.progress( log.reporter(timer, self.config['fancy_progress'])) succeeded, failed = update_masters.run() if failed: self.get_logger().warning( '%d masters had sync errors', failed) self.soft_errors = True
def run_with_status(self, batch_size=DEFAULT_BATCH_SIZE): """ Run the job, report progress, and yield host/status as execution completes. :yields: (host, status) :raises: RuntimeError if command has not been set """ if not self._command: raise RuntimeError('Command must be provided') if not self._reporter: self._reporter = log.reporter(self._command) if self._hosts: self._reporter.expect(len(self._hosts)) self._reporter.start() for host, status, ohandler in cluster_ssh( self._hosts, self._command, self._user, self._key, batch_size, self.max_failure, self.output_handler, self.verbose): if status == 0: self._reporter.add_success() else: self.get_logger().warning('%s on %s returned [%d]: %s', self._command, host, status, ohandler.output) self._reporter.add_failure() yield host, status self._reporter.finish() else: self.get_logger().warning('Job %s called with an empty host list.', self._command)
def _after_cluster_sync(self): # Rebuild l10n CDB files target_hosts = self._get_target_list() with log.Timer('scap-cdb-rebuild', self.get_stats()): rebuild_cdbs = ssh.Job(target_hosts, user=self.config['ssh_user'], key=self.get_keyholder_key()) rebuild_cdbs.shuffle() cdb_cmd = 'sudo -u mwdeploy -n -- {} cdb-rebuild --version {}' cdb_cmd = cdb_cmd.format(self.get_script_path(), self.arguments.version) rebuild_cdbs.command(cdb_cmd) rebuild_cdbs.progress( log.reporter('scap-cdb-rebuild', self.config['fancy_progress'])) succeeded, failed = rebuild_cdbs.run() if failed: self.get_logger().warning( '%d hosts had scap-cdb-rebuild errors', failed) self.soft_errors = True tasks.clear_message_blobs() # Globally invalidate opcache. TODO: is this needed? self._invalidate_opcache(target_hosts) self._restart_php()
def main(self, *extra_args): """Perform a sync operation to the cluster.""" print(ansi.logo()) self._assert_auth_sock() with lock.Lock(self.get_lock_file(), self.arguments.message): self._check_sync_flag() if not self.arguments.force: self.get_logger().info( 'Checking for new runtime errors locally') self._check_fatals() else: self.get_logger().warning('check_fatals Skipped by --force') self._before_cluster_sync() self._sync_common() self._after_sync_common() self._sync_masters() full_target_list = self._get_target_list() if not self.arguments.force: canaries = [ node for node in self._get_canary_list() if node in full_target_list ] with log.Timer('sync-check-canaries', self.get_stats()) as timer: self.sync_canary(canaries) timer.mark('Canaries Synced') self._invalidate_opcache(canaries) self.canary_checks(canaries, timer) else: self.get_logger().warning('Canaries Skipped by --force') # Update proxies proxies = [ node for node in self._get_proxy_list() if node in full_target_list ] with log.Timer('sync-proxies', self.get_stats()): sync_cmd = self._apache_sync_command(self.get_master_list()) # Proxies should always use the current host as their sync # origin server. sync_cmd.append(socket.getfqdn()) update_proxies = ssh.Job(proxies, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_proxies.command(sync_cmd) update_proxies.progress( log.reporter('sync-proxies', self.config['fancy_progress'])) succeeded, failed = update_proxies.run() if failed: self.get_logger().warning('%d proxies had sync errors', failed) self.soft_errors = True # Update apaches with log.Timer('sync-apaches', self.get_stats()): update_apaches = ssh.Job(full_target_list, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_apaches.exclude_hosts(proxies) update_apaches.exclude_hosts(self.get_master_list()) if not self.arguments.force: update_apaches.exclude_hosts(canaries) update_apaches.shuffle() update_apaches.command(self._apache_sync_command(proxies)) update_apaches.progress( log.reporter('sync-apaches', self.config['fancy_progress'])) succeeded, failed = update_apaches.run() if failed: self.get_logger().warning('%d apaches had sync errors', failed) self.soft_errors = True self._after_cluster_sync() self._after_lock_release() if self.soft_errors: return 1 return 0
def main(self, *extra_args): """Perform a sync operation to the cluster.""" print(ansi.logo()) self._assert_auth_sock() with lock.Lock(self.get_lock_file(), self.arguments.message): self._check_sync_flag() if not self.arguments.force: self.get_logger().info( 'Checking for new runtime errors locally') self._check_fatals() else: self.get_logger().warning('check_fatals Skipped by --force') self._before_cluster_sync() self._sync_common() self._after_sync_common() self._sync_masters() full_target_list = self._get_target_list() if not self.arguments.force: canaries = [node for node in self._get_canary_list() if node in full_target_list] with log.Timer( 'sync-check-canaries', self.get_stats()) as timer: self.sync_canary(canaries) timer.mark('Canaries Synced') self._invalidate_opcache(canaries) self.canary_checks(canaries, timer) else: self.get_logger().warning('Canaries Skipped by --force') # Update proxies proxies = [node for node in self._get_proxy_list() if node in full_target_list] conftool_conf = self.config['conftool_config'] if proxies and conftool_conf: # Before we hammer the proxies, depool them self.get_logger().info('Depooling proxies') proxy_pooler = pooler.Pooler(conftool_conf, proxies) proxy_pooler.depool() with log.Timer('sync-proxies', self.get_stats()): sync_cmd = self._apache_sync_command(self.get_master_list()) # Proxies should always use the current host as their sync # origin server. sync_cmd.append(socket.getfqdn()) update_proxies = ssh.Job( proxies, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_proxies.command(sync_cmd) update_proxies.progress( log.reporter( 'sync-proxies', self.config['fancy_progress'])) succeeded, failed = update_proxies.run() if failed: self.get_logger().warning( '%d proxies had sync errors', failed) self.soft_errors = True # Update apaches with log.Timer('sync-apaches', self.get_stats()): update_apaches = ssh.Job( full_target_list, user=self.config['ssh_user'], key=self.get_keyholder_key()) update_apaches.exclude_hosts(proxies) update_apaches.exclude_hosts(self.get_master_list()) if not self.arguments.force: update_apaches.exclude_hosts(canaries) update_apaches.shuffle() update_apaches.command(self._apache_sync_command(proxies)) update_apaches.progress( log.reporter( 'sync-apaches', self.config['fancy_progress'])) succeeded, failed = update_apaches.run() if failed: self.get_logger().warning( '%d apaches had sync errors', failed) self.soft_errors = True if proxies and conftool_conf: # Ok all done self.get_logger().info('Repooling proxies') proxy_pooler.pool() self._after_cluster_sync() self._after_lock_release() if self.soft_errors: return 1 return 0