def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Switch MediaWiki active datacenter to %s', args.dc_to) dnsdisc_records = spicerack.discovery(*MEDIAWIKI_SERVICES) mediawiki = spicerack.mediawiki() # Pool DNS discovery records on the new dc. # This will NOT trigger confd to change the DNS admin state as it will cause a validation error dnsdisc_records.pool(args.dc_to) # Switch MediaWiki master datacenter start = time.time() mediawiki.set_master_datacenter(args.dc_to) # Depool DNS discovery records on the old dc, confd will apply the change dnsdisc_records.depool(args.dc_from) # Verify that the IP of the records matches the expected one for record in MEDIAWIKI_SERVICES: # Converting the name of the discovery -rw into the LVS svc record name. This will strip -rw and -ro # services to the same record, so we'll check it twice, which is unnecessary but harmless. We also # strip -php, because parsoid-php has a conftool entry but not a DNS record of its own. name = record.replace('-rw', '').replace('-ro', '').replace('-php', '') dnsdisc_records.check_record(record, '{name}.svc.{dc_to}.wmnet'.format(name=name, dc_to=args.dc_to)) # Sleep remaining time up to DNS_SHORT_TTL to let the set_master_datacenter to propagate remaining = DNS_SHORT_TTL - (time.time() - start) if remaining > 0: logger.info('Sleeping %.3f seconds to reach the %d seconds mark', remaining, DNS_SHORT_TTL) time.sleep(remaining)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Setting in read-write mode all the core DB masters in %s', args.dc_to) mysql = spicerack.mysql_legacy() mysql.set_core_masters_readwrite(args.dc_to)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) remote = spicerack.remote() logger.info('Disabling Puppet on MediaWiki maintenance hosts in %s and %s', args.dc_from, args.dc_to) remote.query('A:mw-maintenance').run_sync( 'disable-puppet "{message}"'.format(message=PUPPET_REASON))
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Set MediaWiki in read-write in %s', args.dc_to) mediawiki = spicerack.mediawiki() prefix = '' if args.live_test: prefix = '[DRY-RUN] ' mediawiki.set_readwrite(args.dc_to) spicerack.irc_logger.info('%sMediaWiki read-only period ends at: %s', prefix, datetime.utcnow())
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Starting MediaWiki maintenance jobs in %s', args.dc_to) mw_maintenance = spicerack.remote().query('A:mw-maintenance') mw_maintenance.run_sync('run-puppet-agent --enable "{message}"'.format(message=PUPPET_REASON)) mediawiki = spicerack.mediawiki() # Verify timers are enabled in both DCs mediawiki.check_periodic_jobs_enabled(args.dc_to) mediawiki.check_periodic_jobs_enabled(args.dc_from)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Restoring DNS Discovery TTL to 300 for records: %s', MEDIAWIKI_SERVICES) dnsdisc_records = spicerack.discovery(*MEDIAWIKI_SERVICES) dnsdisc_records.update_ttl(300) logger.info('Removing stale confd files generated when switching discovery records') command = 'rm -fv /var/run/confd-template/.discovery-{{{records}}}.state*.err'.format( records=','.join(MEDIAWIKI_SERVICES)) spicerack.remote().query('A:dns-auth').run_sync(command)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) redis = spicerack.redis_cluster('sessions') logger.info('Stopping replication in %s for the sessions Redis cluster', args.dc_to) redis.stop_replica(args.dc_to) logger.info('Starting replication %s => %s for the sessions Redis cluster', args.dc_to, args.dc_from) redis.start_replica(args.dc_from, args.dc_to)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Reducing DNS Discovery TTL to %d for records: %s', DNS_SHORT_TTL, MEDIAWIKI_SERVICES) discovery = spicerack.discovery(*MEDIAWIKI_SERVICES) old_ttl_sec = max(record.ttl for record in discovery.resolve()) discovery.update_ttl(DNS_SHORT_TTL) logger.info( 'Sleeping for the old TTL (%d seconds) to allow the old records to expire...', old_ttl_sec) time.sleep(old_ttl_sec)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) datacenters = [args.dc_from] if args.live_test: logger.info("Skipping disable of maintenance jobs in %s (active DC)", args.dc_to) else: datacenters.append(args.dc_to) logger.info('Stopping MediaWiki maintenance jobs in %s', ', '.join(datacenters)) for datacenter in datacenters: spicerack.mediawiki().stop_periodic_jobs(datacenter)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) if args.live_test: logger.info('Inverting DC to perform the warmup in %s (passive DC)', args.dc_from) datacenter = args.dc_from else: datacenter = args.dc_to ask_confirmation( 'Are you sure to warmup caches in {dc}?'.format(dc=datacenter)) warmup_dir = '/var/lib/mediawiki-cache-warmup' # urls-cluster is only running against appservers since is for shared resources behind the # servers themselves warmups = [ "nodejs {dir}/warmup.js {dir}/urls-cluster.txt spread appservers.svc.{dc}.wmnet" .format(dir=warmup_dir, dc=datacenter) ] for cluster in ["appserver", "api_appserver"]: # urls-server runs against both appserver and API clusters since it's for each individual server warmups.append( "nodejs {dir}/warmup.js {dir}/urls-server.txt clone {cluster} {dc}" .format(dir=warmup_dir, dc=datacenter, cluster=cluster)) maintenance_host = spicerack.mediawiki().get_maintenance_host(datacenter) # It takes multiple executions of the warmup script to fully warm up the appserver caches. The second run is faster # than the first, and so on. Empirically, we consider the caches to be fully warmed up when this speedup disappears; # that is, when the execution time converges, and each attempt takes about as long as the one before. logger.info('Running warmup script in %s.', datacenter) logger.info('The script will re-run until execution time converges.') last_duration = datetime.timedelta.max for i in itertools.count(1): logger.info('Running warmup script, take %d', i) start_time = datetime.datetime.utcnow() maintenance_host.run_sync(*warmups) duration = datetime.datetime.utcnow() - start_time logger.info('Warmup completed in %s', duration) # After we've done a minimum number of iterations, we stop looping as soon as the warmup script takes more # than 95% as long as the previous run. That is, keep looping as long as it keeps going faster than before, # but with a 5% margin of error. At that point, any further reduction is probably just noise. if i >= MINIMUM_ITERATIONS and duration > 0.95 * last_duration: break last_duration = duration logger.info('Execution time converged, warmup complete.')
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Set MediaWiki in read-only in %s and %s', args.dc_from, args.dc_to) mediawiki = spicerack.mediawiki() if args.live_test: logger.info('Skip setting MediaWiki read-only in %s', args.dc_to) prefix = '[DRY-RUN] ' else: mediawiki.set_readonly(args.dc_to, args.ro_reason) prefix = '' spicerack.irc_logger.info('%sMediaWiki read-only period starts at: %s', prefix, datetime.utcnow()) mediawiki.set_readonly(args.dc_from, args.ro_reason) logger.info('Sleeping 10s to allow in-flight requests to complete') time.sleep(10)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info( 'Setting in read-only mode all the core DB masters in %s and verify those in %s', args.dc_from, args.dc_to) mysql = spicerack.mysql_legacy() if args.live_test: logger.info( 'Skip verifying core DB masters in %s are in read-only mode', args.dc_to) else: mysql.verify_core_masters_readonly(args.dc_to, True) mysql.set_core_masters_readonly(args.dc_from) logger.info( 'Check that all core masters in %s are in sync with the core masters in %s.', args.dc_to, args.dc_from) mysql.check_core_masters_in_sync(args.dc_from, args.dc_to)
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Restarting Envoy on jobrunners in %s', args.dc_from) spicerack.remote().query('A:mw-jobrunner-{dc}'.format( dc=args.dc_from)).run_sync('systemctl restart envoyproxy')
def run(args, spicerack): """Required by Spicerack API.""" post_process_args(args) logger.info('Running Puppet on all DB masters') spicerack.remote().query('A:db-role-master').run_sync('run-puppet-agent', batch_size=5)