def tearDownModule(): logging.debug("in tearDownModule") if utils.options.skip_teardown: return logging.debug("Tearing down the servers and setup") teardown_procs = [shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) utils.zk_teardown() shard_0_master.kill_vttablet() shard_0_replica.kill_vttablet() shard_1_master.kill_vttablet() shard_1_replica.kill_vttablet() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_1_master.remove_tree() shard_1_replica.remove_tree()
def tearDownModule(): global vtgate_server logging.debug("in tearDownModule") if utils.options.skip_teardown: return logging.debug("Tearing down the servers and setup") utils.vtgate_kill(vtgate_server) tablet.kill_tablets([shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]) teardown_procs = [shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server_teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_1_master.remove_tree() shard_1_replica.remove_tree()
def setUpModule(): try: environment.topo_server_setup() setup_procs = [ shard_0_master.init_mysql(), shard_0_replica.init_mysql(), shard_0_ny_slave.init_mysql(), shard_1_master.init_mysql(), shard_1_slave1.init_mysql(), shard_1_slave2.init_mysql(), shard_1_ny_slave.init_mysql(), shard_1_rdonly.init_mysql(), shard_2_master.init_mysql(), shard_2_replica1.init_mysql(), shard_2_replica2.init_mysql(), shard_3_master.init_mysql(), shard_3_replica.init_mysql(), shard_3_rdonly.init_mysql(), ] utils.Vtctld().start() utils.wait_procs(setup_procs) except: tearDownModule() raise
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db("vt_test_keyspace") pos = mysql_flavor().master_position(tablet_62344) changeMasterCmds = mysql_flavor().change_master_commands(utils.hostname, tablet_62344.mysql_port, pos) tablet_62044.mquery("", ["RESET MASTER", "RESET SLAVE"] + changeMasterCmds + ["START SLAVE"]) # now shutdown all mysqld shutdown_procs = [tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql()] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet("master", "test_keyspace", "0") tablet_62044.init_tablet("spare", "test_keyspace", "0", include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet( wait_for_state=None, target_tablet_type="replica", full_mycnf_args=True, include_mysql_port=False ) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state("NOT_SERVING") self.check_healthz(t, False) # restart mysqld start_procs = [tablet_62344.start_mysql(), tablet_62044.start_mysql()] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(["RunHealthCheck", tablet_62344.tablet_alias, "replica"], auto_log=True) self.check_healthz(tablet_62344, True) # the slave won't be healthy at first, as replication is not running utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True) self.check_healthz(tablet_62044, False) tablet_62044.wait_for_vttablet_state("NOT_SERVING") # restart replication tablet_62044.mquery("", ["START SLAVE"]) # wait for the tablet to become healthy and fix its mysql port utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True) tablet_62044.wait_for_vttablet_state("SERVING") self.check_healthz(tablet_62044, True) for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(["GetTablet", t.tablet_alias]) if "mysql" in ti["Portmap"]: break timeout = utils.wait_step("mysql port in tablet record", timeout) self.assertEqual(ti["Portmap"]["mysql"], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def tearDownModule(): utils.required_teardown() if utils.options.skip_teardown: return if use_mysqlctld: # Try to terminate mysqlctld gracefully, so it kills its mysqld. for proc in setup_procs: utils.kill_sub_process(proc, soft=True) teardown_procs = setup_procs else: teardown_procs = [ tablet_master.teardown_mysql(), tablet_replica1.teardown_mysql(), tablet_replica2.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() tablet_master.remove_tree() tablet_replica1.remove_tree() tablet_replica2.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return tablet.kill_tablets([src_master, src_replica, src_rdonly1, src_rdonly2, dst_master, dst_replica]) teardown_procs = [ src_master.teardown_mysql(), src_replica.teardown_mysql(), src_rdonly1.teardown_mysql(), src_rdonly2.teardown_mysql(), dst_master.teardown_mysql(), dst_replica.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() src_master.remove_tree() src_replica.remove_tree() src_rdonly1.remove_tree() src_rdonly2.remove_tree() dst_master.remove_tree() dst_replica.remove_tree()
def setUpModule(): try: environment.topo_server().setup() setup_procs = [ shard_master.init_mysql(), shard_replica.init_mysql(), shard_rdonly1.init_mysql(), shard_0_master.init_mysql(), shard_0_replica.init_mysql(), shard_0_rdonly1.init_mysql(), shard_1_master.init_mysql(), shard_1_replica.init_mysql(), shard_1_rdonly1.init_mysql(), ] utils.wait_procs(setup_procs) init_keyspace() logging.debug('environment set up with the following shards and tablets:') logging.debug('=========================================================') logging.debug('TABLETS: test_keyspace/0:\n%s', all_shard_tablets) logging.debug('TABLETS: test_keyspace/-80:\n%s', shard_0_tablets) logging.debug('TABLETS: test_keyspace/80-:\n%s', shard_1_tablets) except: tearDownModule() raise
def set_up(self): try: environment.topo_server_setup() utils.wait_procs([t.init_mysql() for t in self.tablets]) utils.run_vtctl(['CreateKeyspace', self.keyspace]) utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', self.keyspace, 'keyspace_id', 'uint64']) for t in self.tablets: t.init_tablet(t.type, keyspace=self.keyspace, shard=t.shard) utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True) for t in self.tablets: t.create_db('vt_' + self.keyspace) t.mquery(t.dbname, create_table) t.start_vttablet(wait_for_state=None) for t in self.tablets: t.wait_for_vttablet_state('SERVING') for t in self.tablets: if t.type == "master": utils.run_vtctl(['ReparentShard', '-force', self.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True) self.vtgate_server, self.vtgate_port = utils.vtgate_start() vtgate_client = zkocc.ZkOccConnection("localhost:%u" % self.vtgate_port, "test_nj", 30.0) topology.read_topology(vtgate_client) except: self.shutdown() raise
def setUp(self): utils.wait_procs([self.tablet.init_mysql()]) self.tablet.mquery( '', ['create database vt_test_keyspace', 'set global read_only = off']) self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace') with open( os.path.join(self.vttop, 'test', 'test_data', 'test_schema.sql')) as f: self.clean_sqls = [] self.init_sqls = [] clean_mode = False for line in f: line = line.rstrip() if line == '# clean': clean_mode = True if line=='' or line.startswith('#'): continue if clean_mode: self.clean_sqls.append(line) else: self.init_sqls.append(line) try: for line in self.init_sqls: mcu.execute(line, {}) finally: mcu.close() customrules = os.path.join(environment.tmproot, 'customrules.json') schema_override = os.path.join(environment.tmproot, 'schema_override.json') self.create_schema_override(schema_override) table_acl_config = os.path.join( environment.vttop, 'test', 'test_data', 'table_acl_config.json') environment.topo_server().setup() self.create_customrules(customrules); utils.run_vtctl('CreateKeyspace -force test_keyspace') self.tablet.init_tablet('master', 'test_keyspace', '0') if environment.topo_server().flavor() == 'zookeeper': self.tablet.start_vttablet( memcache=self.memcache, zkcustomrules='/zk/test_ca/config/customrules/testrules', schema_override=schema_override, table_acl_config=table_acl_config, ) else: self.tablet.start_vttablet( memcache=self.memcache, filecustomrules=customrules, schema_override=schema_override, table_acl_config=table_acl_config, ) self.conn = self.connect() self.txlogger = utils.curl( self.url('/debug/txlog'), background=True, stdout=open(self.txlog_file, 'w')) self.txlog = framework.Tailer(self.txlog_file, flush=self.tablet.flush) self.log = framework.Tailer( os.path.join(environment.vtlogroot, 'vttablet.INFO'), flush=self.tablet.flush) self.querylog = Querylog(self)
def tearDownModule(): utils.required_teardown() if utils.options.skip_teardown: return if utils.vtgate: utils.vtgate.kill() teardown_procs = [ source_master.teardown_mysql(), source_replica.teardown_mysql(), source_rdonly1.teardown_mysql(), source_rdonly2.teardown_mysql(), destination_master.teardown_mysql(), destination_replica.teardown_mysql(), destination_rdonly1.teardown_mysql(), destination_rdonly2.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() source_master.remove_tree() source_replica.remove_tree() source_rdonly1.remove_tree() source_rdonly2.remove_tree() destination_master.remove_tree() destination_replica.remove_tree() destination_rdonly1.remove_tree() destination_rdonly2.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return tablet.kill_tablets([shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]) teardown_procs = [ shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica.teardown_mysql(), unsharded_master.teardown_mysql(), unsharded_replica.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_1_master.remove_tree() shard_1_replica.remove_tree() unsharded_master.remove_tree() unsharded_replica.remove_tree()
def setup(): utils.zk_setup() # start mysql instance external to the test setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()] utils.wait_procs(setup_procs) setup_tablets()
def teardown(self): all_tablets = self.tablet_map.values() tablet.kill_tablets(all_tablets) teardown_procs = [t.teardown_mysql() for t in all_tablets] utils.wait_procs(teardown_procs, raise_on_error=False) for t in all_tablets: t.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return teardown_procs = [ shard_0_master.teardown_mysql(), shard_0_replica1.teardown_mysql(), shard_0_replica2.teardown_mysql(), shard_0_rdonly.teardown_mysql(), shard_0_backup.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica1.teardown_mysql(), shard_2_master.teardown_mysql(), shard_2_replica1.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica1.remove_tree() shard_0_replica2.remove_tree() shard_0_rdonly.remove_tree() shard_0_backup.remove_tree() shard_1_master.remove_tree() shard_1_replica1.remove_tree() shard_2_master.remove_tree() shard_2_replica1.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return if use_mysqlctld: # Try to terminate mysqlctld gracefully, so it kills its mysqld. for proc in setup_procs: utils.kill_sub_process(proc, soft=True) teardown_procs = setup_procs else: teardown_procs = [ tablet_62344.teardown_mysql(), tablet_31981.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() tablet_62344.remove_tree() tablet_31981.remove_tree() path = os.path.join(environment.vtdataroot, 'snapshot') try: shutil.rmtree(path) except OSError as e: logging.debug("removing snapshot %s: %s", path, str(e))
def set_up(self): try: environment.topo_server().setup() utils.wait_procs([t.init_mysql() for t in self.tablets]) utils.run_vtctl(['CreateKeyspace', self.keyspace]) utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', self.keyspace, 'keyspace_id', 'uint64']) for t in self.tablets: t.init_tablet(t.type, keyspace=self.keyspace, shard=t.shard) utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True) for t in self.tablets: t.create_db('vt_' + self.keyspace) t.start_vttablet( wait_for_state=None, extra_args=['-queryserver-config-schema-reload-time', '1'], ) for t in self.tablets: t.wait_for_vttablet_state('SERVING') for t in self.tablets: if t.type == "master": utils.run_vtctl(['ReparentShard', '-force', self.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True) if self.schema: utils.run_vtctl(['ApplySchemaKeyspace', '-simple', '-sql', self.schema, self.keyspace]) if self.vschema: if self.vschema[0] == '{': utils.run_vtctl(['ApplyVSchema', "-vschema", self.vschema]) else: utils.run_vtctl(['ApplyVSchema', "-vschema_file", self.vschema]) self.vtgate_server, self.vtgate_port = utils.vtgate_start(cache_ttl='500s', vtport=self.vtgate_port) vtgate_client = zkocc.ZkOccConnection("localhost:%u" % self.vtgate_port, "test_nj", 30.0) topology.read_topology(vtgate_client) except: self.shutdown() raise
def teardown(): if utils.options.skip_teardown: return teardown_procs = [ tablet_62344.teardown_mysql(), tablet_62044.teardown_mysql(), tablet_41983.teardown_mysql(), tablet_31981.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) utils.zk_teardown() utils.kill_sub_processes() utils.remove_tmp_files() tablet_62344.remove_tree() tablet_62044.remove_tree() tablet_41983.remove_tree() tablet_31981.remove_tree() path = os.path.join(utils.vtdataroot, 'snapshot') try: shutil.rmtree(path) except OSError as e: if utils.options.verbose: print >> sys.stderr, e, path
def teardown(): if utils.options.skip_teardown: return teardown_procs = [ shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica.teardown_mysql(), shard_2_master.teardown_mysql(), shard_2_replica.teardown_mysql(), shard_3_master.teardown_mysql(), shard_3_replica.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) utils.zk_teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_1_master.remove_tree() shard_1_replica.remove_tree() shard_2_master.remove_tree() shard_2_replica.remove_tree() shard_3_master.remove_tree() shard_3_replica.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return teardown_procs = [ tablet_62344.teardown_mysql(), tablet_62044.teardown_mysql(), tablet_41983.teardown_mysql(), tablet_31981.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) utils.zk_teardown() utils.kill_sub_processes() utils.remove_tmp_files() tablet_62344.remove_tree() tablet_62044.remove_tree() tablet_41983.remove_tree() tablet_31981.remove_tree() path = os.path.join(utils.vtdataroot, 'snapshot') try: shutil.rmtree(path) except OSError as e: logging.debug("removing snapshot %s: %s", path, str(e))
def launch( self, keyspace, shards=None, replica_count=1, rdonly_count=0, ddls=None): """Launch test environment.""" if replica_count < 1: raise Exception('replica_count=%d < 1; tests now use semi-sync' ' and must have at least one replica' % replica_count) self.tablets = [] self.master_tablets = [] utils.run_vtctl(['CreateKeyspace', keyspace]) if not shards or shards[0] == '0': shards = ['0'] # Create tablets and start mysqld. procs = [] for shard in shards: procs.append(self._new_tablet(keyspace, shard, 'master', None)) for i in xrange(replica_count): procs.append(self._new_tablet(keyspace, shard, 'replica', i)) for i in xrange(rdonly_count): procs.append(self._new_tablet(keyspace, shard, 'rdonly', i)) utils.wait_procs(procs) # init tablets. for shard in shards: tablet_index = 0 self._init_tablet(keyspace, shard, 'master', None, tablet_index) tablet_index += 1 for i in xrange(replica_count): self._init_tablet(keyspace, shard, 'replica', i, tablet_index) tablet_index += 1 for i in xrange(rdonly_count): self._init_tablet(keyspace, shard, 'rdonly', i, tablet_index) tablet_index += 1 # Start tablets. for shard in shards: self._start_tablet(keyspace, shard, 'master', None) for i in xrange(replica_count): self._start_tablet(keyspace, shard, 'replica', i) for i in xrange(rdonly_count): self._start_tablet(keyspace, shard, 'rdonly', i) for t in self.tablets: t.wait_for_vttablet_state('NOT_SERVING') for t in self.master_tablets: utils.run_vtctl(['InitShardMaster', '-force', keyspace+'/'+t.shard, t.tablet_alias], auto_log=True) t.tablet_type = 'master' for t in self.tablets: t.wait_for_vttablet_state('SERVING') for ddl in ddls: fname = os.path.join(environment.tmproot, 'ddl.sql') with open(fname, 'w') as f: f.write(ddl) utils.run_vtctl(['ApplySchema', '-sql-file', fname, keyspace])
def setUp(self): environment.topo_server_setup() utils.wait_procs([self.tablet.init_mysql()]) self.tablet.mquery("", ["create database vt_test_keyspace", "set global read_only = off"]) self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace') self.clean_sqls = [] self.init_sqls = [] clean_mode = False with open(os.path.join(self.vttop, "test", "test_data", "test_schema.sql")) as f: for line in f: line = line.rstrip() if line == "# clean": clean_mode = True if line=='' or line.startswith("#"): continue if clean_mode: self.clean_sqls.append(line) else: self.init_sqls.append(line) try: for line in self.init_sqls: mcu.execute(line, {}) finally: mcu.close() utils.run_vtctl('CreateKeyspace -force test_keyspace') self.tablet.init_tablet('master', 'test_keyspace', '0') customrules = os.path.join(environment.tmproot, 'customrules.json') self.create_customrules(customrules) schema_override = os.path.join(environment.tmproot, 'schema_override.json') self.create_schema_override(schema_override) table_acl_config = os.path.join(environment.vttop, 'test', 'test_data', 'table_acl_config.json') self.tablet.start_vttablet( memcache=self.memcache, customrules=customrules, schema_override=schema_override, table_acl_config=table_acl_config, auth=True, ) # FIXME(szopa): This is necessary here only because of a bug that # makes the qs reload its config only after an action. utils.run_vtctl('Ping ' + self.tablet.tablet_alias) for i in range(30): try: self.conn = self.connect() self.txlogger = utils.curl(self.url('/debug/txlog'), background=True, stdout=open(self.txlog_file, 'w')) self.txlog = framework.Tailer(open(self.txlog_file), flush=self.tablet.flush) self.log = framework.Tailer(open(os.path.join(environment.vtlogroot, 'vttablet.INFO')), flush=self.tablet.flush) break except dbexceptions.OperationalError: if i == 29: raise time.sleep(1) self.postSetup()
def setUp(self): """Shuts down MySQL on the destination masters (in addition to the base setup)""" logging.debug("Starting base setup for MysqlDownDuringWorkerCopy") super(TestMysqlDownDuringWorkerCopy, self).setUp() logging.debug("Starting MysqlDownDuringWorkerCopy-specific setup") utils.wait_procs([shard_0_master.shutdown_mysql(), shard_1_master.shutdown_mysql()]) logging.debug("Finished MysqlDownDuringWorkerCopy-specific setup")
def tearDown(self): """Restarts the MySQL processes that were killed during the setup.""" logging.debug("Starting MysqlDownDuringWorkerCopy-specific tearDown") utils.wait_procs([shard_0_master.start_mysql(), shard_1_master.start_mysql()]) logging.debug("Finished MysqlDownDuringWorkerCopy-specific tearDown") super(TestMysqlDownDuringWorkerCopy, self).tearDown() logging.debug("Finished base tearDown for MysqlDownDuringWorkerCopy")
def tearDownModule(): if utils.options.skip_teardown: return utils.wait_procs([t.teardown_mysql() for t in tablets], raise_on_error=False) utils.kill_sub_processes() for t in tablets: t.remove_tree()
def setUpModule(): try: environment.topo_server().setup() setup_procs = [t.init_mysql() for t in all_tablets] utils.wait_procs(setup_procs) except: tearDownModule() raise
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) changeMasterCmds = mysql_flavor().change_master_commands( utils.hostname, tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + changeMasterCmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('spare', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet(wait_for_state=None, target_tablet_type='replica', full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # wait for the tablets to become healthy and fix their mysql port for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('SERVING') for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['Portmap']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['Portmap']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def shutdown(self): tablet.kill_tablets(self.tablets) teardown_procs = [t.teardown_mysql() for t in self.tablets] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() for t in self.tablets: t.remove_tree()
def _reset_tablet_dir(self, t): """Stop mysql, delete everything including tablet dir, restart mysql.""" utils.wait_procs([t.teardown_mysql()]) t.remove_tree() proc = t.init_mysql() if use_mysqlctld: t.wait_for_mysqlctl_socket() else: utils.wait_procs([proc])
def setUpModule(): global vtgate_server global vtgate_port global vtgate_socket_file global master_start_position try: environment.topo_server_setup() # start mysql instance external to the test setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql() ] utils.wait_procs(setup_procs) # Start up a master mysql and vttablet logging.debug("Setting up tablets") utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) master_tablet.init_tablet('master', 'test_keyspace', '0') replica_tablet.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0']) utils.validate_topology() master_tablet.create_db('vt_test_keyspace') master_tablet.create_db('other_database') replica_tablet.create_db('vt_test_keyspace') replica_tablet.create_db('other_database') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace']) vtgate_socket_file = environment.tmproot + '/vtgate.sock' vtgate_server, vtgate_port = utils.vtgate_start(socket_file=vtgate_socket_file) master_tablet.start_vttablet() replica_tablet.start_vttablet() utils.run_vtctl(['SetReadWrite', master_tablet.tablet_alias]) utils.check_db_read_write(master_tablet.tablet_uid) for t in [master_tablet, replica_tablet]: t.reset_replication() utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0', master_tablet.tablet_alias], auto_log=True) # reset counter so tests don't assert tablet.Tablet.tablets_running = 0 master_start_position = _get_master_current_position() master_tablet.mquery('vt_test_keyspace', _create_vt_insert_test) master_tablet.mquery('vt_test_keyspace', _create_vt_a) master_tablet.mquery('vt_test_keyspace', _create_vt_b) utils.run_vtctl(['ReloadSchema', master_tablet.tablet_alias]) utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias]) except: tearDownModule() raise
def setUp(self): utils.wait_procs([self.tablet.init_mysql()]) self.tablet.mquery("", ["create database vt_test_keyspace", "set global read_only = off"]) self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace') with open(os.path.join(self.vttop, "test", "test_data", "test_schema.sql")) as f: self.clean_sqls = [] self.init_sqls = [] clean_mode = False for line in f: line = line.rstrip() if line == "# clean": clean_mode = True if line=='' or line.startswith("#"): continue if clean_mode: self.clean_sqls.append(line) else: self.init_sqls.append(line) try: for line in self.init_sqls: mcu.execute(line, {}) finally: mcu.close() customrules = os.path.join(environment.tmproot, 'customrules.json') self.create_customrules(customrules) schema_override = os.path.join(environment.tmproot, 'schema_override.json') self.create_schema_override(schema_override) table_acl_config = os.path.join(environment.vttop, 'test', 'test_data', 'table_acl_config.json') if self.env == 'vttablet': environment.topo_server().setup() utils.run_vtctl('CreateKeyspace -force test_keyspace') self.tablet.init_tablet('master', 'test_keyspace', '0') self.tablet.start_vttablet( memcache=self.memcache, customrules=customrules, schema_override=schema_override, table_acl_config=table_acl_config, auth=True, ) else: self.tablet.start_vtocc( memcache=self.memcache, customrules=customrules, schema_override=schema_override, table_acl_config=table_acl_config, auth=True, keyspace="test_keyspace", shard="0", ) self.conn = self.connect() self.txlogger = utils.curl(self.url('/debug/txlog'), background=True, stdout=open(self.txlog_file, 'w')) self.txlog = framework.Tailer(self.txlog_file, flush=self.tablet.flush) self.log = framework.Tailer(os.path.join(environment.vtlogroot, '%s.INFO' % self.env), flush=self.tablet.flush) self.querylog = Querylog(self)
def tearDownModule(): global vtgate_server global __tablets logging.debug("in tearDownModule") if utils.options.skip_teardown: return logging.debug("Tearing down the servers and setup") utils.vtgate_kill(vtgate_server) if __tablets is not None: tablet.kill_tablets(__tablets) teardown_procs = [] for t in __tablets: teardown_procs.append(t.teardown_mysql()) utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() if __tablets is not None: for t in __tablets: t.remove_tree()
def setUpModule(): try: environment.topo_server().setup() # setup all processes setup_procs = [ shard_0_master.init_mysql(), shard_0_slave.init_mysql(), ] utils.wait_procs(setup_procs) utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) shard_0_master.init_tablet('replica', 'test_keyspace', '0') shard_0_slave.init_tablet('replica', 'test_keyspace', '0') # create databases so vttablet can start behaving normally shard_0_master.create_db('vt_test_keyspace') shard_0_slave.create_db('vt_test_keyspace') except: tearDownModule() raise
def setUpModule(): try: environment.topo_server().setup() utils.Vtctld().start() setup_procs = [ master_tablet.init_mysql(), replica_tablet.init_mysql(), ] utils.wait_procs(setup_procs) utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) master_tablet.init_tablet('master', 'test_keyspace', '0') replica_tablet.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0']) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) master_tablet.create_db('vt_test_keyspace') replica_tablet.create_db('vt_test_keyspace') except: tearDownModule() raise
def setUpModule(): try: environment.topo_server().setup() # start mysql instance external to the test setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()] utils.wait_procs(setup_procs) # Start up a master mysql and vttablet logging.debug("Setting up tablets") utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) master_tablet.init_tablet('master', 'test_keyspace', '0') replica_tablet.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace']) utils.validate_topology() master_tablet.populate('vt_test_keyspace', create_vt_insert_test) replica_tablet.populate('vt_test_keyspace', create_vt_insert_test) utils.VtGate().start() master_tablet.start_vttablet(memcache=True, wait_for_state=None) replica_tablet.start_vttablet(memcache=True, wait_for_state=None) master_tablet.wait_for_vttablet_state('SERVING') replica_tablet.wait_for_vttablet_state('SERVING') utils.run_vtctl( ['InitShardMaster', 'test_keyspace/0', master_tablet.tablet_alias], auto_log=True) utils.validate_topology() # restart the replica tablet so the stats are reset replica_tablet.kill_vttablet() replica_tablet.start_vttablet(memcache=True) except: tearDownModule() raise
def setUpModule(): try: environment.topo_server_setup() setup_procs = [ shard_0_master.init_mysql(), shard_0_replica.init_mysql(), shard_0_ny_slave.init_mysql(), shard_1_master.init_mysql(), shard_1_slave1.init_mysql(), shard_1_slave2.init_mysql(), shard_1_ny_slave.init_mysql(), shard_1_rdonly.init_mysql(), shard_2_master.init_mysql(), shard_2_replica1.init_mysql(), shard_2_replica2.init_mysql(), shard_3_master.init_mysql(), shard_3_replica.init_mysql(), shard_3_rdonly.init_mysql(), ] utils.wait_procs(setup_procs) except: tearDownModule() raise
def tearDownModule(): if utils.options.skip_teardown: return if use_mysqlctld: # Try to terminate mysqlctld gracefully, so it kills its mysqld. for proc in setup_procs: utils.kill_sub_process(proc, soft=True) teardown_procs = setup_procs else: teardown_procs = [ tablet_master.teardown_mysql(), tablet_replica1.teardown_mysql(), tablet_replica2.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() tablet_master.remove_tree() tablet_replica1.remove_tree() tablet_replica2.remove_tree()
def setUpModule(): global vtgate_server, vtgate_port logging.debug("in setUpModule") try: environment.topo_server().setup() setup_topology() # start mysql instance external to the test global __tablets setup_procs = [] for tablet in __tablets: setup_procs.append(tablet.init_mysql()) utils.wait_procs(setup_procs) create_db() start_tablets() vtgate_server, vtgate_port = utils.vtgate_start() # FIXME(shrutip): this should be removed once vtgate_cursor's # dependency on topology goes away. vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port, "test_nj", 30.0) topology.read_topology(vtgate_client) except: tearDownModule() raise
def tearDownModule(): if utils.options.skip_teardown: return teardown_procs = [ source_master.teardown_mysql(), source_replica.teardown_mysql(), source_rdonly.teardown_mysql(), destination_master.teardown_mysql(), destination_replica.teardown_mysql(), destination_rdonly.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server_teardown() utils.kill_sub_processes() utils.remove_tmp_files() source_master.remove_tree() source_replica.remove_tree() source_rdonly.remove_tree() destination_master.remove_tree() destination_replica.remove_tree() destination_rdonly.remove_tree()
def tearDownModule(): logging.debug('in tearDownModule') if utils.options.skip_teardown: return logging.debug('Tearing down the servers and setup') tablet.kill_tablets( [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]) teardown_procs = [ shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_1_master.remove_tree() shard_1_replica.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return teardown_procs = [ shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_slave1.teardown_mysql(), shard_1_slave2.teardown_mysql(), shard_1_rdonly.teardown_mysql(), shard_2_master.teardown_mysql(), shard_2_replica1.teardown_mysql(), shard_2_replica2.teardown_mysql(), shard_3_master.teardown_mysql(), shard_3_replica.teardown_mysql(), shard_3_rdonly.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server_teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_1_master.remove_tree() shard_1_slave1.remove_tree() shard_1_slave2.remove_tree() shard_1_rdonly.remove_tree() shard_2_master.remove_tree() shard_2_replica1.remove_tree() shard_2_replica2.remove_tree() shard_3_master.remove_tree() shard_3_replica.remove_tree() shard_3_rdonly.remove_tree()
def tearDownModule(): if utils.options.skip_teardown: return global vtgate_server utils.vtgate_kill(vtgate_server) tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master, shard_1_replica, shard_1_rdonly]) teardown_procs = [ shard_0_master.teardown_mysql(), shard_0_replica.teardown_mysql(), shard_0_rdonly.teardown_mysql(), shard_1_master.teardown_mysql(), shard_1_replica.teardown_mysql(), shard_1_rdonly.teardown_mysql(), unsharded_master.teardown_mysql(), unsharded_replica.teardown_mysql(), unsharded_rdonly.teardown_mysql(), ] utils.wait_procs(teardown_procs, raise_on_error=False) environment.topo_server().teardown() utils.kill_sub_processes() utils.remove_tmp_files() shard_0_master.remove_tree() shard_0_replica.remove_tree() shard_0_rdonly.remove_tree() shard_1_master.remove_tree() shard_1_replica.remove_tree() shard_1_rdonly.remove_tree() shard_1_rdonly.remove_tree() shard_1_rdonly.remove_tree() unsharded_master.remove_tree() unsharded_replica.remove_tree() unsharded_rdonly.remove_tree()
def setUpModule(): try: environment.topo_server().setup() # start mysql instance external to the test utils.wait_procs([t.init_mysql() for t in all_tablets]) # start a vtctld so the vtctl insert commands are just RPCs, not forks utils.Vtctld().start() # Start up a master mysql and vttablet logging.debug('Setting up tablets') utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) master_tablet.init_tablet('master', 'test_keyspace', '0') replica_tablet.init_tablet('replica', 'test_keyspace', '0') replica2_tablet.init_tablet('replica', 'test_keyspace', '0') utils.validate_topology() for t in all_tablets: t.populate('vt_test_keyspace', create_vt_insert_test) for t in all_tablets: t.start_vttablet(memcache=True, wait_for_state=None) for t in all_tablets: t.wait_for_vttablet_state('SERVING') utils.run_vtctl(['InitShardMaster', 'test_keyspace/0', master_tablet.tablet_alias], auto_log=True) utils.validate_topology() # restart the replica tablet so the stats are reset replica_tablet.kill_vttablet() replica_tablet.start_vttablet(memcache=True) except: tearDownModule() raise
def _init_mysql(tablets): setup_procs = [] for t in tablets: setup_procs.append(t.init_mysql()) utils.wait_procs(setup_procs)
def setUpModule(): global master_start_position try: environment.topo_server().setup() # start mysql instance external to the test setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()] utils.wait_procs(setup_procs) # start a vtctld so the vtctl insert commands are just RPCs, not forks utils.Vtctld().start() # Start up a master mysql and vttablet logging.debug('Setting up tablets') utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) master_tablet.init_tablet('master', 'test_keyspace', '0', tablet_index=0) replica_tablet.init_tablet('replica', 'test_keyspace', '0', tablet_index=1) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) utils.validate_topology() master_tablet.create_db('vt_test_keyspace') master_tablet.create_db('other_database') replica_tablet.create_db('vt_test_keyspace') replica_tablet.create_db('other_database') master_tablet.start_vttablet(wait_for_state=None) replica_tablet.start_vttablet(wait_for_state=None) master_tablet.wait_for_vttablet_state('SERVING') replica_tablet.wait_for_vttablet_state('NOT_SERVING') for t in [master_tablet, replica_tablet]: t.reset_replication() utils.run_vtctl( ['InitShardMaster', 'test_keyspace/0', master_tablet.tablet_alias], auto_log=True) utils.wait_for_tablet_type(replica_tablet.tablet_alias, 'replica') master_tablet.wait_for_vttablet_state('SERVING') replica_tablet.wait_for_vttablet_state('SERVING') # reset counter so tests don't assert tablet.Tablet.tablets_running = 0 master_start_position = _get_master_current_position() master_tablet.mquery('vt_test_keyspace', _create_vt_insert_test) master_tablet.mquery('vt_test_keyspace', _create_vt_a) master_tablet.mquery('vt_test_keyspace', _create_vt_b) utils.run_vtctl(['ReloadSchema', master_tablet.tablet_alias]) utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias]) utils.run_vtctl(['RebuildVSchemaGraph']) utils.VtGate().start(tablets=[master_tablet, replica_tablet]) utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1) utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1) # Wait for the master and slave tablet's ReloadSchema to have worked. # Note we don't specify a keyspace name, there is only one, vschema # will just use that single keyspace. timeout = 10 while True: try: utils.vtgate.execute('select count(1) from vt_insert_test', tablet_type='master') utils.vtgate.execute('select count(1) from vt_insert_test', tablet_type='replica') break except protocols_flavor().client_error_exception_type(): logging.exception('query failed') timeout = utils.wait_step('slave tablet having correct schema', timeout) # also re-run ReloadSchema on slave, it case the first one # didn't get the replicated table. utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias]) except: tearDownModule() raise
with self.assertRaises(dbexceptions.TimeoutError): replica_conn._execute("select sleep(12) from dual", {}) try: master_conn = get_connection(db_type='master') except Exception, e: self.fail("Connection to shard0 master failed with error %s" % str(e)) with self.assertRaises(dbexceptions.TimeoutError): master_conn._execute("select sleep(12) from dual", {}) def test_restart_mysql_failure(self): try: replica_conn = get_connection(db_type='replica', shard_index=self.shard_index) except Exception, e: self.fail("Connection to shard0 replica failed with error %s" % str(e)) utils.wait_procs([self.replica_tablet.shutdown_mysql(),]) with self.assertRaises(dbexceptions.DatabaseError): replica_conn._execute("select 1 from vt_insert_test", {}) utils.wait_procs([self.replica_tablet.start_mysql(),]) self.replica_tablet.kill_vttablet() self.replica_tablet.start_vttablet() replica_conn._execute("select 1 from vt_insert_test", {}) def test_retry_txn_pool_full(self): master_conn = get_connection(db_type='master') master_conn._execute("set vt_transaction_cap=1", {}) master_conn.begin() with self.assertRaises(dbexceptions.OperationalError): master_conn2 = get_connection(db_type='master') master_conn2.begin() master_conn.commit()
def test_query_timeout(self): try: replica_conn = get_replica_connection() except Exception, e: self.fail("Connection to shard0 replica failed with error %s" % str(e)) with self.assertRaises(tablet3.TimeoutError): replica_conn._execute("select sleep(12) from dual", {}) try: master_conn = get_master_connection() except Exception, e: self.fail("Connection to shard0 master failed with error %s" % str(e)) with self.assertRaises(tablet3.TimeoutError): master_conn._execute("select sleep(12) from dual", {}) def test_mysql_failure(self): try: replica_conn = get_replica_connection() except Exception, e: self.fail("Connection to shard0 replica failed with error %s" % str(e)) utils.wait_procs([shard_0_replica.shutdown_mysql(),]) with self.assertRaises(tablet3.FatalError): replica_conn._execute("select 1 from vt_insert_test", {}) utils.wait_procs([shard_0_replica.start_mysql(),]) shard_0_replica.kill_vttablet() shard_0_replica.start_vttablet() if __name__ == '__main__': utils.main()
def test_resharding(self): # create the keyspace with just one shard shard_master.init_tablet('replica', keyspace='test_keyspace', shard='0', tablet_index=0) shard_replica.init_tablet('replica', keyspace='test_keyspace', shard='0', tablet_index=1) shard_rdonly1.init_tablet('rdonly', keyspace='test_keyspace', shard='0', tablet_index=2) for t in [shard_master, shard_replica, shard_rdonly1]: t.create_db('vt_test_keyspace') # replica is not started, InitShardMaster should timeout shard_master.start_vttablet(wait_for_state=None, binlog_use_v3_resharding_mode=False) shard_rdonly1.start_vttablet(wait_for_state=None, binlog_use_v3_resharding_mode=False) for t in [shard_master, shard_rdonly1]: t.wait_for_vttablet_state('NOT_SERVING') # reparent to make the tablets work - expect fail # because replica tablet is not up _, stderr = utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/0', shard_master.tablet_alias ], auto_log=True, expect_fail=True) self.assertIn('tablet test_nj-0000062345 ResetReplication failed', stderr) # start replica shard_replica.start_vttablet(wait_for_state=None, binlog_use_v3_resharding_mode=False) shard_replica.wait_for_vttablet_state('NOT_SERVING') # reparent to make the tablets work utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/0', shard_master.tablet_alias ], auto_log=True) utils.wait_for_tablet_type(shard_replica.tablet_alias, 'replica') utils.wait_for_tablet_type(shard_rdonly1.tablet_alias, 'rdonly') for t in [shard_master, shard_replica, shard_rdonly1]: t.wait_for_vttablet_state('SERVING') # create the tables and add startup values self._create_schema() self._insert_startup_values() # reload schema on all tablets so we can query them for t in [shard_master, shard_replica, shard_rdonly1]: utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True) # We must start vtgate after tablets are up, or else wait until 1min refresh # (that is the tablet_refresh_interval parameter for discovery gateway) # we want cache_ttl at zero so we re-read the topology for every test query. utils.VtGate().start( cache_ttl='0', tablets=[shard_master, shard_replica, shard_rdonly1]) utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1) utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1) utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1) # check the Map Reduce API works correctly, should use ExecuteShards, # as we're not sharded yet. # we have 3 values in the database, asking for 4 splits will get us # a single query. sql = 'select id, msg from resharding1' s = utils.vtgate.split_query(sql, 'test_keyspace', 4) self.assertEqual(len(s), 1) self.assertEqual(s[0]['shard_part']['shards'][0], '0') # change the schema, backfill keyspace_id, and change schema again self._add_sharding_key_to_schema() self._backfill_keyspace_id(shard_master) self._mark_sharding_key_not_null() # now we can be a sharded keyspace (and propagate to SrvKeyspace) utils.run_vtctl([ 'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col', base_sharding.keyspace_id_type ]) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # run a health check on source replica so it responds to discovery utils.run_vtctl(['RunHealthCheck', shard_replica.tablet_alias]) # create the split shards shard_0_master.init_tablet('replica', keyspace='test_keyspace', shard='-80', tablet_index=0) shard_0_replica.init_tablet('replica', keyspace='test_keyspace', shard='-80', tablet_index=1) shard_0_rdonly1.init_tablet('rdonly', keyspace='test_keyspace', shard='-80', tablet_index=2) shard_1_master.init_tablet('replica', keyspace='test_keyspace', shard='80-', tablet_index=0) shard_1_replica.init_tablet('replica', keyspace='test_keyspace', shard='80-', tablet_index=1) shard_1_rdonly1.init_tablet('rdonly', keyspace='test_keyspace', shard='80-', tablet_index=2) for t in [ shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master, shard_1_replica, shard_1_rdonly1 ]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None, binlog_use_v3_resharding_mode=False) for t in [ shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master, shard_1_replica, shard_1_rdonly1 ]: t.wait_for_vttablet_state('NOT_SERVING') utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/-80', shard_0_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/80-', shard_1_master.tablet_alias ], auto_log=True) for t in [shard_0_replica, shard_1_replica]: utils.wait_for_tablet_type(t.tablet_alias, 'replica') for t in [shard_0_rdonly1, shard_1_rdonly1]: utils.wait_for_tablet_type(t.tablet_alias, 'rdonly') sharded_tablets = [ shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master, shard_1_replica, shard_1_rdonly1 ] for t in sharded_tablets: t.wait_for_vttablet_state('SERVING') # must restart vtgate after tablets are up, or else wait until 1min refresh # we want cache_ttl at zero so we re-read the topology for every test query. utils.vtgate.kill() utils.vtgate = None utils.VtGate().start(cache_ttl='0', tablets=[ shard_master, shard_replica, shard_rdonly1, shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master, shard_1_replica, shard_1_rdonly1 ]) var = None # Wait for the endpoints, either local or remote. utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.-80.replica', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica', 1, var=var) utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1, var=var) # check the Map Reduce API works correctly, should use ExecuteKeyRanges now, # as we are sharded (with just one shard). # again, we have 3 values in the database, asking for 4 splits will get us # a single query. sql = 'select id, msg from resharding1' s = utils.vtgate.split_query(sql, 'test_keyspace', 4) self.assertEqual(len(s), 1) self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace') # There must be one empty KeyRange which represents the full keyspace. self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1) self.assertEqual(s[0]['key_range_part']['key_ranges'][0], {}) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -\n' 'Partitions(rdonly): -\n' 'Partitions(replica): -\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # we need to create the schema, and the worker will do data copying for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'): utils.run_vtctl([ 'CopySchemaShard', '--exclude_tables', 'unrelated', shard_rdonly1.tablet_alias, keyspace_shard ], auto_log=True) utils.run_vtctl(['RunHealthCheck', shard_rdonly1.tablet_alias]) # Run vtworker as daemon for the following SplitClone commands. worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg( [ '--cell', 'test_nj', '--command_display_interval', '10ms', '--use_v3_resharding_mode=false' ], auto_log=True) # Initial clone (online). workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--offline=false', '--exclude_tables', 'unrelated', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0' ], worker_rpc_port) utils.wait_procs([workerclient_proc]) self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 3, 0, 0, 0) # Reset vtworker such that we can run the next command. workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Modify the destination shard. SplitClone will revert the changes. # Delete row 1 (provokes an insert). shard_0_master.mquery('vt_test_keyspace', 'delete from resharding1 where id=1', write=True) # Delete row 2 (provokes an insert). shard_1_master.mquery('vt_test_keyspace', 'delete from resharding1 where id=2', write=True) # Update row 3 (provokes an update). shard_1_master.mquery( 'vt_test_keyspace', "update resharding1 set msg='msg-not-3' where id=3", write=True) # Insert row 4 (provokes a delete). self._insert_value(shard_1_master, 'resharding1', 4, 'msg4', 0xD000000000000000) workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0' ], worker_rpc_port) utils.wait_procs([workerclient_proc]) self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 2, 1, 1, 0) self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1', 0, 0, 0, 3) # Terminate worker daemon because it is no longer needed. utils.kill_sub_process(worker_proc, soft=True) # check the startup values are in the right place self._check_startup_values() # check the schema too utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True) # check the binlog players are running logging.debug('Waiting for binlog players to start on new masters...') self.check_destination_master(shard_0_master, ['test_keyspace/0']) self.check_destination_master(shard_1_master, ['test_keyspace/0']) # check that binlog server exported the stats vars self.check_binlog_server_vars(shard_replica, horizontal=True) # testing filtered replication: insert a bunch of data on shard 1, # check we get most of it after a few seconds, wait for binlog server # timeout, check we get all of it. logging.debug('Inserting lots of data on source shard') self._insert_lots(1000) logging.debug('Checking 80 percent of data is sent quickly') v = self._check_lots_timeout(1000, 80, 5) if v != 100: logging.debug('Checking all data goes through eventually') self._check_lots_timeout(1000, 100, 20) logging.debug('Checking no data was sent the wrong way') self._check_lots_not_present(1000) self.check_binlog_player_vars(shard_0_master, ['test_keyspace/0'], seconds_behind_master_max=30) self.check_binlog_player_vars(shard_1_master, ['test_keyspace/0'], seconds_behind_master_max=30) self.check_binlog_server_vars(shard_replica, horizontal=True, min_statements=1000, min_transactions=1000) # use vtworker to compare the data for t in [shard_0_rdonly1, shard_1_rdonly1]: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) if base_sharding.use_multi_split_diff: logging.debug('Running vtworker MultiSplitDiff for 0') utils.run_vtworker([ '-cell', 'test_nj', '--use_v3_resharding_mode=false', 'MultiSplitDiff', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0' ], auto_log=True) else: logging.debug('Running vtworker SplitDiff for -80') utils.run_vtworker([ '-cell', 'test_nj', '--use_v3_resharding_mode=false', 'SplitDiff', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/-80' ], auto_log=True) logging.debug('Running vtworker SplitDiff for 80-') utils.run_vtworker([ '-cell', 'test_nj', '--use_v3_resharding_mode=false', 'SplitDiff', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/80-' ], auto_log=True) utils.pause('Good time to test vtworker for diffs') # get status for the destination master tablet, make sure we have it all self.check_running_binlog_player(shard_0_master, 2000, 2000) self.check_running_binlog_player(shard_1_master, 6000, 2000) # check we can't migrate the master just yet utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'], expect_fail=True) # now serve rdonly from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # make sure rdonly tablets are back to serving before hitting vtgate. for t in [shard_0_rdonly1, shard_1_rdonly1]: t.wait_for_vttablet_state('SERVING') utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1) utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1) # check the Map Reduce API works correctly, should use ExecuteKeyRanges # on both destination shards now. # we ask for 2 splits to only have one per shard sql = 'select id, msg from resharding1' timeout = 10.0 while True: try: s = utils.vtgate.split_query(sql, 'test_keyspace', 2) break except Exception: # pylint: disable=broad-except timeout = utils.wait_step( 'vtgate executes split_query properly', timeout) self.assertEqual(len(s), 2) self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace') self.assertEqual(s[1]['key_range_part']['keyspace'], 'test_keyspace') self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1) self.assertEqual(len(s[1]['key_range_part']['key_ranges']), 1) # then serve replica from the split shards source_tablet = shard_replica destination_tablets = [shard_0_replica, shard_1_replica] utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # move replica back and forth utils.run_vtctl( ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'], auto_log=True) # After a backwards migration, queryservice should be enabled on # source and disabled on destinations utils.check_tablet_query_service(self, source_tablet, True, False) utils.check_tablet_query_services(self, destination_tablets, False, True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'], auto_log=True) # After a forwards migration, queryservice should be disabled on # source and enabled on destinations utils.check_tablet_query_service(self, source_tablet, False, True) utils.check_tablet_query_services(self, destination_tablets, True, False) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # then serve master from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # check the binlog players are gone now self.check_no_binlog_player(shard_0_master) self.check_no_binlog_player(shard_1_master) # make sure we can't delete a shard with tablets utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True) # remove the original tablets in the original shard tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1]) for t in [shard_replica, shard_rdonly1]: utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True) utils.run_vtctl( ['DeleteTablet', '-allow_master', shard_master.tablet_alias], auto_log=True) # rebuild the serving graph, all mentions of the old shards should be gone utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # delete the original shard utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True) # kill everything else tablet.kill_tablets([ shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master, shard_1_replica, shard_1_rdonly1 ])
def verify_successful_worker_copy_with_reparent(self, mysql_down=False): """Verifies that vtworker can successfully copy data for a SplitClone. Order of operations: 1. Run a background vtworker 2. Wait until the worker successfully resolves the destination masters. 3. Reparent the destination tablets 4. Wait until the vtworker copy is finished 5. Verify that the worker was forced to reresolve topology and retry writes due to the reparent. 6. Verify that the data was copied successfully to both new shards Args: mysql_down: boolean. If True, we take down the MySQL instances on the destination masters at first, then bring them back and reparent away. Raises: AssertionError if things didn't go as expected. """ if mysql_down: logging.debug('Shutting down mysqld on destination masters.') utils.wait_procs( [shard_0_master.shutdown_mysql(), shard_1_master.shutdown_mysql()]) worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg( ['--cell', 'test_nj'], auto_log=True) workerclient_proc = utils.run_vtworker_client_bg( ['SplitClone', '--source_reader_count', '1', '--destination_pack_count', '1', '--destination_writer_count', '1', 'test_keyspace/0'], worker_rpc_port) if mysql_down: # If MySQL is down, we wait until resolving at least twice (to verify that # we do reresolve and retry due to MySQL being down). worker_vars = utils.poll_for_vars( 'vtworker', worker_port, 'WorkerDestinationActualResolves >= 2', condition_fn=lambda v: v.get('WorkerDestinationActualResolves') >= 2) self.assertNotEqual( worker_vars['WorkerRetryCount'], {}, "expected vtworker to retry, but it didn't") logging.debug('Worker has resolved at least twice, starting reparent now') # Bring back masters. Since we test with semi-sync now, we need at least # one replica for the new master. This test is already quite expensive, # so we bring back the old master as a replica rather than having a third # replica up the whole time. logging.debug('Restarting mysqld on destination masters') utils.wait_procs( [shard_0_master.start_mysql(), shard_1_master.start_mysql()]) # Reparent away from the old masters. utils.run_vtctl( ['PlannedReparentShard', 'test_keyspace/-80', shard_0_replica.tablet_alias], auto_log=True) utils.run_vtctl( ['PlannedReparentShard', 'test_keyspace/80-', shard_1_replica.tablet_alias], auto_log=True) else: # NOTE: There is a race condition around this: # It's possible that the SplitClone vtworker command finishes before the # PlannedReparentShard vtctl command, which we start below, succeeds. # Then the test would fail because vtworker did not have to resolve the # master tablet again (due to the missing reparent). # # To workaround this, the test takes a parameter to increase the number of # rows that the worker has to copy (with the idea being to slow the worker # down). # You should choose a value for num_insert_rows, such that this test # passes for your environment (trial-and-error...) utils.poll_for_vars( 'vtworker', worker_port, 'WorkerDestinationActualResolves >= 1', condition_fn=lambda v: v.get('WorkerDestinationActualResolves') >= 1) logging.debug('Worker has resolved at least once, starting reparent now') utils.run_vtctl( ['PlannedReparentShard', 'test_keyspace/-80', shard_0_replica.tablet_alias], auto_log=True) utils.run_vtctl( ['PlannedReparentShard', 'test_keyspace/80-', shard_1_replica.tablet_alias], auto_log=True) utils.wait_procs([workerclient_proc]) # Verify that we were forced to reresolve and retry. worker_vars = utils.get_vars(worker_port) self.assertGreater(worker_vars['WorkerDestinationActualResolves'], 1) self.assertGreater(worker_vars['WorkerDestinationAttemptedResolves'], 1) self.assertNotEqual(worker_vars['WorkerRetryCount'], {}, "expected vtworker to retry, but it didn't") utils.kill_sub_process(worker_proc, soft=True) # Make sure that everything is caught up to the same replication point self.run_split_diff('test_keyspace/-80', all_shard_tablets, shard_0_tablets) self.run_split_diff('test_keyspace/80-', all_shard_tablets, shard_1_tablets) self.assert_shard_data_equal(0, shard_master, shard_0_tablets.replica) self.assert_shard_data_equal(1, shard_master, shard_1_tablets.replica)
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + change_master_cmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('replica', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: # Since MySQL is down at this point and we want the tablet to start up # successfully, we have to use supports_backups=False. t.start_vttablet(wait_for_state=None, supports_backups=False, full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(t, False) # Tell slave to not try to repair replication in healthcheck. # The StopSlave will ultimately fail because mysqld is not running, # But vttablet should remember that it's not supposed to fix replication. utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias], expect_fail=True) # The above notice to not fix replication should survive tablet restart. tablet_62044.kill_vttablet() tablet_62044.start_vttablet(wait_for_state='NOT_SERVING', full_mycnf_args=True, include_mysql_port=False, supports_backups=False) # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias], auto_log=True) self.check_healthz(tablet_62344, True) # the slave will now be healthy, but report a very high replication # lag, because it can't figure out what it exactly is. utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias], auto_log=True) tablet_62044.wait_for_vttablet_state('SERVING') self.check_healthz(tablet_62044, True) health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias]) self.assertIn('seconds_behind_master', health['realtime_stats']) self.assertEqual(health['realtime_stats']['seconds_behind_master'], 7200) self.assertIn('serving', health) # restart replication, wait until health check goes small # (a value of zero is default and won't be in structure) utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias]) timeout = 10 while True: utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias], auto_log=True) health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias]) if 'serving' in health and ( ('seconds_behind_master' not in health['realtime_stats']) or (health['realtime_stats']['seconds_behind_master'] < 30)): break timeout = utils.wait_step('health delay goes back down', timeout) # wait for the tablet to fix its mysql port for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['port_map']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['port_map']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) changeMasterCmds = mysql_flavor().change_master_commands( utils.hostname, tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + changeMasterCmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('spare', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet(wait_for_state=None, target_tablet_type='replica', full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(t, False) # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias, 'replica'], auto_log=True) self.check_healthz(tablet_62344, True) # the slave won't be healthy at first, as replication is not running utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'], auto_log=True) self.check_healthz(tablet_62044, False) tablet_62044.wait_for_vttablet_state('NOT_SERVING') # restart replication tablet_62044.mquery('', ['START SLAVE']) # wait for the tablet to become healthy and fix its mysql port utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'], auto_log=True) tablet_62044.wait_for_vttablet_state('SERVING') self.check_healthz(tablet_62044, True) for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['Portmap']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['Portmap']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def test_reparent_with_down_slave(self, shard_id='0'): """See if a missing slave can be safely reparented after the fact.""" utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving tablet_62344.create_db('vt_test_keyspace') tablet_62044.create_db('vt_test_keyspace') tablet_41983.create_db('vt_test_keyspace') tablet_31981.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_31981, tablet_41983]: t.wait_for_vttablet_state('NOT_SERVING') # Force the slaves to reparent assuming that all the datasets are identical. utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/' + shard_id, tablet_62344.tablet_alias ]) utils.validate_topology(ping_tablets=True) tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test) utils.wait_procs([tablet_41983.shutdown_mysql()]) # Perform a graceful reparent operation. It will fail as one tablet is down. _, stderr = utils.run_vtctl([ 'PlannedReparentShard', '-keyspace_shard', 'test_keyspace/' + shard_id, '-new_master', tablet_62044.tablet_alias ], expect_fail=True) self.assertIn('TabletManager.SetMaster on test_nj-0000041983 error', stderr) # insert data into the new master, check the connected slaves work self._populate_vt_insert_test(tablet_62044, 3) self._check_vt_insert_test(tablet_31981, 3) self._check_vt_insert_test(tablet_62344, 3) # restart mysql on the old slave, should still be connecting to the # old master utils.wait_procs([tablet_41983.start_mysql()]) utils.pause('check orphan') # reparent the tablet (will not start replication, so we have to # do it ourselves), then it should catch up on replication really quickly utils.run_vtctl(['ReparentTablet', tablet_41983.tablet_alias]) utils.run_vtctl(['StartSlave', tablet_41983.tablet_alias]) # wait until it gets the data self._check_vt_insert_test(tablet_41983, 3) tablet.kill_tablets( [tablet_62344, tablet_62044, tablet_41983, tablet_31981])
def setUp(self): utils.zk_setup() utils.setup() if self.vttop is None: raise EnvironmentError("VTTOP not defined") if self.vtroot is None: raise EnvironmentError("VTROOT not defined") framework.execute('go build', verbose=utils.options.verbose, cwd=self.vttop + '/go/cmd/mysqlctl') utils.wait_procs([self.tablet.init_mysql()]) self.tablet.mquery( "", ["create database vt_test_keyspace", "set global read_only = off"]) self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace') self.clean_sqls = [] self.init_sqls = [] clean_mode = False with open( os.path.join(self.vttop, "test", "test_data", "test_schema.sql")) as f: for line in f: line = line.rstrip() if line == "# clean": clean_mode = True if line == '' or line.startswith("#"): continue if clean_mode: self.clean_sqls.append(line) else: self.init_sqls.append(line) try: for line in self.init_sqls: mcu.execute(line, {}) finally: mcu.close() utils.run_vtctl( 'CreateKeyspace -force /zk/global/vt/keyspaces/test_keyspace') self.tablet.init_tablet('master', 'test_keyspace', '0') customrules = '/tmp/customrules.json' self.create_customrules(customrules) schema_override = '/tmp/schema_override.json' self.create_schema_override(schema_override) if utils.options.memcache: self.tablet.start_vttablet(memcache=True, customrules=customrules, schema_override=schema_override) else: self.tablet.start_vttablet(customrules=customrules, schema_override=schema_override) # FIXME(szopa): This is necessary here only because of a bug that # makes the qs reload its config only after an action. utils.run_vtctl('Ping ' + self.tablet.zk_tablet_path) for i in range(30): try: self.conn = self.connect() self.txlogger = subprocess.Popen( ['curl', '-s', '-N', 'http://localhost:9461/debug/txlog'], stdout=open('/tmp/vtocc_txlog.log', 'w')) self.txlog = framework.Tailer(open('/tmp/vtocc_txlog.log'), flush=self.tablet.flush) self.log = framework.Tailer(open( os.path.join(self.tablet.tablet_dir, 'vttablet.INFO')), flush=self.tablet.flush) querylog_file = '/tmp/vtocc_streamlog_%s.log' % self.tablet.port utils.run_bg([ 'curl', '-s', '-N', 'http://localhost:9461/debug/querylog?full=true' ], stdout=open(querylog_file, 'w')) time.sleep(1) self.querylog = framework.Tailer(open(querylog_file), sleep=0.1) return except dbexceptions.OperationalError: if i == 29: raise time.sleep(1)
def setUp(self): self.master = tablet.Tablet() self.replica = tablet.Tablet() self.all_tablets = [self.master, self.replica] try: environment.topo_server().setup() setup_procs = [t.init_mysql() for t in self.all_tablets] utils.Vtctld().start() utils.wait_procs(setup_procs) utils.run_vtctl(['CreateKeyspace', KEYSPACE]) # Start tablets. db_name = 'vt_' + KEYSPACE for t in self.all_tablets: t.create_db(db_name) self.master.start_vttablet(wait_for_state=None, init_tablet_type='replica', init_keyspace=KEYSPACE, init_shard=SHARD, tablet_index=0) self.replica.start_vttablet(wait_for_state=None, init_tablet_type='replica', init_keyspace=KEYSPACE, init_shard=SHARD, tablet_index=1) for t in self.all_tablets: t.wait_for_vttablet_state('NOT_SERVING') # Reparent to choose an initial master and enable replication. utils.run_vtctl([ 'InitShardMaster', '-force', '%s/%s' % (KEYSPACE, SHARD), self.master.tablet_alias ]) # Create the schema. utils.run_vtctl(['ApplySchema', '-sql=' + SCHEMA, KEYSPACE]) # Start vtgate. utils.VtGate().start( extra_args=[ '-enable_vtgate_buffer', # Long timeout in case failover is slow. '-vtgate_buffer_window', '10m', '-vtgate_buffer_max_failover_duration', '10m', '-vtgate_buffer_min_time_between_failovers', '20m' ], tablets=self.all_tablets) # Insert two rows for the later threads (critical read, update). with utils.vtgate.write_transaction(keyspace=KEYSPACE, shards=[SHARD], tablet_type='master') as tx: tx.execute('INSERT INTO buffer (id, msg) VALUES (:id, :msg)', { 'id': CRITICAL_READ_ROW_ID, 'msg': 'critical read' }) tx.execute('INSERT INTO buffer (id, msg) VALUES (:id, :msg)', { 'id': UPDATE_ROW_ID, 'msg': 'update' }) except: self.tearDown() raise
def setUpModule(): try: environment.topo_server().setup() setup_procs = [ src_master.init_mysql(), src_replica.init_mysql(), src_rdonly1.init_mysql(), src_rdonly2.init_mysql(), dst_master.init_mysql(), dst_replica.init_mysql(), ] utils.Vtctld().start() utils.wait_procs(setup_procs) # Set up binlog stream from shard 0 to shard 1. # Modeled after initial_sharding.py. utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace', 'keyspace_id', keyrange_constants.KIT_UINT64]) src_master.init_tablet('master', 'test_keyspace', '0') src_replica.init_tablet('replica', 'test_keyspace', '0') src_rdonly1.init_tablet('rdonly', 'test_keyspace', '0') src_rdonly2.init_tablet('rdonly', 'test_keyspace', '0') utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0']) utils.validate_topology() utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) for t in [src_master, src_replica, src_rdonly1, src_rdonly2]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None) for t in [src_master, src_replica, src_rdonly1, src_rdonly2]: t.wait_for_vttablet_state('SERVING') utils.run_vtctl(['InitShardMaster', 'test_keyspace/0', src_master.tablet_alias], auto_log=True) # Create schema logging.debug("Creating schema...") create_table = '''create table test_table( id bigint auto_increment, keyspace_id bigint(20) unsigned, msg varchar(64), primary key (id), index by_msg (msg) ) Engine=InnoDB''' utils.run_vtctl(['ApplySchema', '-sql=' + create_table, 'test_keyspace'], auto_log=True) # Create destination shard. dst_master.init_tablet('master', 'test_keyspace', '-') dst_replica.init_tablet('replica', 'test_keyspace', '-') dst_master.start_vttablet(wait_for_state='NOT_SERVING') dst_replica.start_vttablet(wait_for_state='NOT_SERVING') utils.run_vtctl(['InitShardMaster', 'test_keyspace/-', dst_master.tablet_alias], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # copy the schema utils.run_vtctl(['CopySchemaShard', src_replica.tablet_alias, 'test_keyspace/-'], auto_log=True) # run the clone worked (this is a degenerate case, source and destination # both have the full keyrange. Happens to work correctly). logging.debug("Running the clone worker to start binlog stream...") utils.run_vtworker(['--cell', 'test_nj', 'SplitClone', '--strategy=-populate_blp_checkpoint', '--source_reader_count', '10', '--min_table_size_for_split', '1', 'test_keyspace/0'], auto_log=True) dst_master.wait_for_binlog_player_count(1) # Wait for dst_replica to be ready. dst_replica.wait_for_binlog_server_state("Enabled") except: tearDownModule() raise
def setUpModule(): global new_init_db, db_credentials_file global tablet_master, tablet_replica1, tablet_replica2 tablet_master = tablet.Tablet(use_mysqlctld=use_mysqlctld, vt_dba_passwd='VtDbaPass') tablet_replica1 = tablet.Tablet(use_mysqlctld=use_mysqlctld, vt_dba_passwd='VtDbaPass') tablet_replica2 = tablet.Tablet(use_mysqlctld=use_mysqlctld, vt_dba_passwd='VtDbaPass') try: environment.topo_server().setup() credentials = { 'vt_dba': ['VtDbaPass'], 'vt_app': ['VtAppPass'], 'vt_allprivs': ['VtAllprivsPass'], 'vt_repl': ['VtReplPass'], 'vt_filtered': ['VtFilteredPass'], } db_credentials_file = environment.tmproot + '/db_credentials.json' with open(db_credentials_file, 'w') as fd: fd.write(json.dumps(credentials)) # Determine which column is used for user passwords in this MySQL version. proc = tablet_master.init_mysql() if use_mysqlctld: tablet_master.wait_for_mysqlctl_socket() else: utils.wait_procs([proc]) try: tablet_master.mquery('mysql', 'select password from mysql.user limit 0', user='******') password_col = 'password' except MySQLdb.DatabaseError: password_col = 'authentication_string' utils.wait_procs([tablet_master.teardown_mysql()]) tablet_master.remove_tree(ignore_options=True) # Create a new init_db.sql file that sets up passwords for all users. # Then we use a db-credentials-file with the passwords. new_init_db = environment.tmproot + '/init_db_with_passwords.sql' with open(environment.vttop + '/config/init_db.sql') as fd: init_db = fd.read() with open(new_init_db, 'w') as fd: fd.write(init_db) fd.write(mysql_flavor().change_passwords(password_col)) # start mysql instance external to the test setup_procs = [ tablet_master.init_mysql( init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]), tablet_replica1.init_mysql( init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]), tablet_replica2.init_mysql( init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]), ] if use_mysqlctld: tablet_master.wait_for_mysqlctl_socket() tablet_replica1.wait_for_mysqlctl_socket() tablet_replica2.wait_for_mysqlctl_socket() else: utils.wait_procs(setup_procs) except: tearDownModule() raise
def setUpModule(): global new_init_db, db_credentials_file try: credentials = { 'vt_dba': ['VtDbaPass'], 'vt_app': ['VtAppPass'], 'vt_allprivs': ['VtAllprivsPass'], 'vt_repl': ['VtReplPass'], 'vt_filtered': ['VtFilteredPass'], } db_credentials_file = environment.tmproot + '/db_credentials.json' with open(db_credentials_file, 'w') as fd: fd.write(json.dumps(credentials)) # Determine which column is used for user passwords in this MySQL version. proc = ks1_shard_master.init_mysql() utils.wait_procs([proc]) try: ks1_shard_master.mquery('mysql', 'select password from mysql.user limit 0', user='******') password_col = 'password' except MySQLdb.DatabaseError: password_col = 'authentication_string' utils.wait_procs([ks1_shard_master.teardown_mysql()]) ks1_shard_master.remove_tree(ignore_options=True) # Create a new init_db.sql file that sets up passwords for all users. # Then we use a db-credentials-file with the passwords. new_init_db = environment.tmproot + '/init_db_with_passwords.sql' with open(environment.vttop + '/config/init_db.sql') as fd: init_db = fd.read() with open(new_init_db, 'w') as fd: fd.write(init_db) fd.write(mysql_flavor().change_passwords(password_col)) fd.write(''' # connecting through a port requires 127.0.0.1 # --host=localhost will connect through socket CREATE USER 'vt_dba'@'127.0.0.1' IDENTIFIED BY 'VtDbaPass'; GRANT ALL ON *.* TO 'vt_dba'@'127.0.0.1'; GRANT GRANT OPTION ON *.* TO 'vt_dba'@'127.0.0.1'; # User for app traffic, with global read-write access. CREATE USER 'vt_app'@'127.0.0.1' IDENTIFIED BY 'VtAppPass'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_app'@'127.0.0.1'; # User for administrative operations that need to be executed as non-SUPER. # Same permissions as vt_app here. CREATE USER 'vt_allprivs'@'127.0.0.1' IDENTIFIED BY 'VtAllPrivsPass'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_allprivs'@'127.0.0.1'; # User for Vitess filtered replication (binlog player). # Same permissions as vt_app. CREATE USER 'vt_filtered'@'127.0.0.1' IDENTIFIED BY 'VtFilteredPass'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_filtered'@'127.0.0.1'; FLUSH PRIVILEGES; ''') setup_procs = [ t.init_mysql( use_rbr=True, init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]) for t in all_mysql_tablets ] utils.wait_procs(setup_procs) for i in range(0, len(all_other_tablets)): all_other_tablets[i].mysql_port = all_mysql_tablets[i].mysql_port environment.topo_server().setup() except: tearDownModule() raise
def verify_successful_worker_copy_with_reparent(self, mysql_down=False): """Verifies that vtworker can successfully copy data for a SplitClone. Order of operations: 1. Run a background vtworker 2. Wait until the worker successfully resolves the destination masters. 3. Reparent the destination tablets 4. Wait until the vtworker copy is finished 5. Verify that the worker was forced to reresolve topology and retry writes due to the reparent. 6. Verify that the data was copied successfully to both new shards Args: mysql_down: boolean. If True, we take down the MySQL instances on the destination masters at first, then bring them back and reparent away. Raises: AssertionError if things didn't go as expected. """ if mysql_down: logging.debug('Shutting down mysqld on destination masters.') utils.wait_procs([ shard_0_master.shutdown_mysql(), shard_1_master.shutdown_mysql() ]) worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg( ['--cell', 'test_nj'], auto_log=True) # --max_tps is only specified to enable the throttler and ensure that the # code is executed. But the intent here is not to throttle the test, hence # the rate limit is set very high. # --chunk_count is 2 because rows are currently ordered by primary key such # that all rows of the first shard come first and then the second shard. # TODO(mberlin): Remove --offline=false once vtworker ensures that the # destination shards are not behind the master's replication # position. args = [ 'SplitClone', '--offline=false', '--destination_writer_count', '1', '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999' ] if not mysql_down: # Make the clone as slow as necessary such that there is enough time to # run PlannedReparent in the meantime. # TOOD(mberlin): Once insert_values is fixed to uniformly distribute the # rows across shards when sorted by primary key, remove # --chunk_count 2, --min_rows_per_chunk 1 and set # --source_reader_count back to 1. args.extend([ '--source_reader_count', '2', '--chunk_count', '2', '--min_rows_per_chunk', '1', '--write_query_max_rows', '1' ]) args.append('test_keyspace/0') workerclient_proc = utils.run_vtworker_client_bg(args, worker_rpc_port) if mysql_down: # If MySQL is down, we wait until vtworker retried at least once to make # sure it reached the point where a write failed due to MySQL being down. # There should be two retries at least, one for each destination shard. utils.poll_for_vars( 'vtworker', worker_port, 'WorkerRetryCount >= 2', condition_fn=lambda v: v.get('WorkerRetryCount') >= 2) logging.debug( 'Worker has retried at least twice, starting reparent now') # vtworker is blocked at this point. This is a good time to test that its # throttler server is reacting to RPCs. self.check_throttler_service( 'localhost:%d' % worker_rpc_port, ['test_keyspace/-80', 'test_keyspace/80-'], 9999) # Bring back masters. Since we test with semi-sync now, we need at least # one replica for the new master. This test is already quite expensive, # so we bring back the old master as a replica rather than having a third # replica up the whole time. logging.debug('Restarting mysqld on destination masters') utils.wait_procs( [shard_0_master.start_mysql(), shard_1_master.start_mysql()]) # Reparent away from the old masters. utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/-80', shard_0_replica.tablet_alias ], auto_log=True) utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/80-', shard_1_replica.tablet_alias ], auto_log=True) else: # NOTE: There is a race condition around this: # It's possible that the SplitClone vtworker command finishes before the # PlannedReparentShard vtctl command, which we start below, succeeds. # Then the test would fail because vtworker did not have to retry. # # To workaround this, the test takes a parameter to increase the number of # rows that the worker has to copy (with the idea being to slow the worker # down). # You should choose a value for num_insert_rows, such that this test # passes for your environment (trial-and-error...) # Make sure that vtworker got past the point where it picked a master # for each destination shard ("finding targets" state). utils.poll_for_vars( 'vtworker', worker_port, 'WorkerState == cloning the data (online)', condition_fn=lambda v: v.get('WorkerState') == 'cloning the' ' data (online)') logging.debug('Worker is in copy state, starting reparent now') utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/-80', shard_0_replica.tablet_alias ], auto_log=True) utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/80-', shard_1_replica.tablet_alias ], auto_log=True) utils.wait_procs([workerclient_proc]) # Verify that we were forced to re-resolve and retry. worker_vars = utils.get_vars(worker_port) self.assertGreater( worker_vars['WorkerRetryCount'], 1, "expected vtworker to retry each of the two reparented" " destination masters at least once, but it didn't") self.assertNotEqual(worker_vars['WorkerRetryCount'], {}, "expected vtworker to retry, but it didn't") utils.kill_sub_process(worker_proc, soft=True) # Wait for the destination RDONLYs to catch up or the following offline # clone will try to insert rows which already exist. # TODO(mberlin): Remove this once SplitClone supports it natively. utils.wait_for_replication_pos(shard_0_replica, shard_0_rdonly1) utils.wait_for_replication_pos(shard_1_replica, shard_1_rdonly1) # Run final offline clone to enable filtered replication. _, _ = utils.run_vtworker([ '-cell', 'test_nj', 'SplitClone', '--online=false', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0' ], auto_log=True) # Make sure that everything is caught up to the same replication point self.run_split_diff('test_keyspace/-80', all_shard_tablets, shard_0_tablets) self.run_split_diff('test_keyspace/80-', all_shard_tablets, shard_1_tablets) self.assert_shard_data_equal(0, shard_master, shard_0_tablets.replica) self.assert_shard_data_equal(1, shard_master, shard_1_tablets.replica)
def test_merge_sharding(self): utils.run_vtctl(['CreateKeyspace', '--sharding_column_name', 'custom_ksid_col', '--sharding_column_type', base_sharding.keyspace_id_type, 'test_keyspace']) shard_0_master.init_tablet('replica', 'test_keyspace', '-40') shard_0_replica.init_tablet('replica', 'test_keyspace', '-40') shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40') shard_1_master.init_tablet('replica', 'test_keyspace', '40-80') shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80') shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80') shard_2_master.init_tablet('replica', 'test_keyspace', '80-') shard_2_replica.init_tablet('replica', 'test_keyspace', '80-') shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-') # rebuild and check SrvKeyspace utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace']) self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col') # create databases so vttablet can start behaving normally for t in [shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master, shard_1_replica, shard_1_rdonly, shard_2_master, shard_2_replica, shard_2_rdonly]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None, binlog_use_v3_resharding_mode=False) # won't be serving, no replication state for t in [shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master, shard_1_replica, shard_1_rdonly, shard_2_master, shard_2_replica, shard_2_rdonly]: t.wait_for_vttablet_state('NOT_SERVING') # reparent to make the tablets work utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-40', shard_0_master.tablet_alias], auto_log=True) utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/40-80', shard_1_master.tablet_alias], auto_log=True) utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-', shard_2_master.tablet_alias], auto_log=True) # create the tables self._create_schema() self._insert_startup_values() # run a health check on source replicas so they respond to discovery # (for binlog players) and on the source rdonlys (for workers) for t in [shard_0_replica, shard_1_replica]: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) for t in [shard_0_rdonly, shard_1_rdonly]: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) # create the merge shards shard_dest_master.init_tablet('replica', 'test_keyspace', '-80') shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80') shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80') # start vttablet on the destination shard (no db created, # so they're all not serving) for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]: t.start_vttablet(wait_for_state=None, binlog_use_v3_resharding_mode=False) for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]: t.wait_for_vttablet_state('NOT_SERVING') utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80', shard_dest_master.tablet_alias], auto_log=True) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -40 40-80 80-\n' 'Partitions(rdonly): -40 40-80 80-\n' 'Partitions(replica): -40 40-80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # copy the schema utils.run_vtctl(['CopySchemaShard', shard_0_rdonly.tablet_alias, 'test_keyspace/-80'], auto_log=True) # copy the data (will also start filtered replication), reset source # Run vtworker as daemon for the following SplitClone commands. worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg( ['--cell', 'test_nj', '--command_display_interval', '10ms', '--use_v3_resharding_mode=false'], auto_log=True) # Initial clone (online). workerclient_proc = utils.run_vtworker_client_bg( ['SplitClone', '--offline=false', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/-80'], worker_rpc_port) utils.wait_procs([workerclient_proc]) self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 2, 0, 0, 0) # Reset vtworker such that we can run the next command. workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Modify the destination shard. SplitClone will revert the changes. # Delete row 1 (provokes an insert). shard_dest_master.mquery('vt_test_keyspace', 'delete from resharding1 where id=1', write=True) # Update row 2 (provokes an update). shard_dest_master.mquery( 'vt_test_keyspace', "update resharding1 set msg='msg-not-2' where id=2", write=True) # Insert row 0 (provokes a delete). self._insert_value(shard_dest_master, 'resharding1', 0, 'msg0', 0x5000000000000000) workerclient_proc = utils.run_vtworker_client_bg( ['SplitClone', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/-80'], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Change tablets, which were taken offline, back to rdonly. utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'], auto_log=True) self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 1, 1, 1, 0) self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1', 0, 0, 0, 2) # Terminate worker daemon because it is no longer needed. utils.kill_sub_process(worker_proc, soft=True) # check the startup values are in the right place self._check_startup_values() # check the schema too utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True) # check binlog player variables self.check_destination_master(shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80']) # check that binlog server exported the stats vars self.check_binlog_server_vars(shard_0_replica, horizontal=True) self.check_binlog_server_vars(shard_1_replica, horizontal=True) # testing filtered replication: insert a bunch of data on shard 0 and 1, # check we get most of it after a few seconds, wait for binlog server # timeout, check we get all of it. logging.debug('Inserting lots of data on source shards') self._insert_lots(1000) logging.debug('Checking 80 percent of data is sent quickly') v = self._check_lots_timeout(1000, 80, 10) if v != 100: # small optimization: only do this check if we don't have all the data # already anyway. logging.debug('Checking all data goes through eventually') self._check_lots_timeout(1000, 100, 30) self.check_binlog_player_vars(shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'], seconds_behind_master_max=30) self.check_binlog_server_vars(shard_0_replica, horizontal=True, min_statements=1000, min_transactions=1000) self.check_binlog_server_vars(shard_1_replica, horizontal=True, min_statements=1000, min_transactions=1000) # use vtworker to compare the data (after health-checking the destination # rdonly tablets so discovery works) utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias]) logging.debug('Running vtworker SplitDiff on first half') utils.run_vtworker(['-cell', 'test_nj', '--use_v3_resharding_mode=false', 'SplitDiff', '--exclude_tables', 'unrelated', '--min_healthy_rdonly_tablets', '1', '--source_uid', '1', 'test_keyspace/-80'], auto_log=True) utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'], auto_log=True) logging.debug('Running vtworker SplitDiff on second half') utils.run_vtworker(['-cell', 'test_nj', '--use_v3_resharding_mode=false', 'SplitDiff', '--exclude_tables', 'unrelated', '--min_healthy_rdonly_tablets', '1', '--source_uid', '2', 'test_keyspace/-80'], auto_log=True) utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'], auto_log=True) # get status for the destination master tablet, make sure we have it all self.check_running_binlog_player(shard_dest_master, 3000, 1000) # check destination master query service is not running utils.check_tablet_query_service(self, shard_dest_master, False, False) stream_health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', shard_dest_master.tablet_alias]) logging.debug('Got health: %s', str(stream_health)) self.assertIn('realtime_stats', stream_health) self.assertNotIn('serving', stream_health) # check the destination master 3 is healthy, even though its query # service is not running (if not healthy this would exception out) shard_dest_master.get_healthz() # now serve rdonly from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -40 40-80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -40 40-80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # now serve replica from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -40 40-80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # now serve master from the split shards utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'], auto_log=True) utils.check_srv_keyspace('test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_tablet_query_service(self, shard_0_master, False, True) utils.check_tablet_query_service(self, shard_1_master, False, True) # check the binlog players are gone now self.check_no_binlog_player(shard_dest_master) # kill the original tablets in the original shards tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master, shard_1_replica, shard_1_rdonly]) for t in [shard_0_replica, shard_0_rdonly, shard_1_replica, shard_1_rdonly]: utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True) for t in [shard_0_master, shard_1_master]: utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias], auto_log=True) # delete the original shards utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True) utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True) # rebuild the serving graph, all mentions of the old shards should be gone utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # kill everything else tablet.kill_tablets([shard_2_master, shard_2_replica, shard_2_rdonly, shard_dest_master, shard_dest_replica, shard_dest_rdonly])
def setup(): utils.zk_setup() utils.debug("Creating certificates") os.makedirs(cert_dir) # Create CA certificate ca_key = cert_dir + "/ca-key.pem" ca_cert = cert_dir + "/ca-cert.pem" openssl(["genrsa", "-out", cert_dir + "/ca-key.pem"]) ca_config = cert_dir + "/ca.config" with open(ca_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Mysql CA emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl([ "req", "-new", "-x509", "-nodes", "-days", "3600", "-batch", "-config", ca_config, "-key", ca_key, "-out", ca_cert ]) # Create mysql server certificate, remove passphrase, and sign it server_key = cert_dir + "/server-key.pem" server_cert = cert_dir + "/server-cert.pem" server_req = cert_dir + "/server-req.pem" server_config = cert_dir + "/server.config" with open(server_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Mysql Server emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl([ "req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch", "-config", server_config, "-keyout", server_key, "-out", server_req ]) openssl(["rsa", "-in", server_key, "-out", server_key]) openssl([ "x509", "-req", "-in", server_req, "-days", "3600", "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", "01", "-out", server_cert ]) # Create mysql client certificate, remove passphrase, and sign it client_key = cert_dir + "/client-key.pem" client_cert = cert_dir + "/client-cert.pem" client_req = cert_dir + "/client-req.pem" client_config = cert_dir + "/client.config" with open(client_config, 'w') as fd: fd.write(""" [ req ] default_bits = 1024 default_keyfile = keyfile.pem distinguished_name = req_distinguished_name attributes = req_attributes prompt = no output_password = mypass [ req_distinguished_name ] C = US ST = California L = Mountain View O = Google OU = Vitess CN = Mysql Client emailAddress = [email protected] [ req_attributes ] challengePassword = A challenge password """) openssl([ "req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch", "-config", client_config, "-keyout", client_key, "-out", client_req ]) openssl(["rsa", "-in", client_key, "-out", client_key]) openssl([ "x509", "-req", "-in", client_req, "-days", "3600", "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", "02", "-out", client_cert ]) # Create vt server certificate, remove passphrase, and sign it vt_server_key = cert_dir + "/vt-server-key.pem" vt_server_cert = cert_dir + "/vt-server-cert.pem" vt_server_req = cert_dir + "/vt-server-req.pem" openssl([ "req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch", "-keyout", vt_server_key, "-out", vt_server_req ]) openssl(["rsa", "-in", vt_server_key, "-out", vt_server_key]) openssl([ "x509", "-req", "-in", vt_server_req, "-days", "3600", "-CA", ca_cert, "-CAkey", ca_key, "-set_serial", "03", "-out", vt_server_cert ]) extra_my_cnf = cert_dir + "/secure.cnf" fd = open(extra_my_cnf, "w") fd.write("ssl-ca=" + ca_cert + "\n") fd.write("ssl-cert=" + server_cert + "\n") fd.write("ssl-key=" + server_key + "\n") fd.close() setup_procs = [ shard_0_master.init_mysql(extra_my_cnf=extra_my_cnf), shard_0_slave.init_mysql(extra_my_cnf=extra_my_cnf), ] utils.wait_procs(setup_procs) utils.run_vtctl('CreateKeyspace test_keyspace') shard_0_master.init_tablet('master', 'test_keyspace', '0') shard_0_slave.init_tablet('replica', 'test_keyspace', '0') utils.run_vtctl('RebuildShardGraph test_keyspace/0', auto_log=True) utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True) # create databases so vttablet can start behaving normally shard_0_master.create_db('vt_test_keyspace') shard_0_slave.create_db('vt_test_keyspace')
def test_resharding(self): # we're going to reparent and swap these two global shard_2_master, shard_2_replica1 utils.run_vtctl([ 'CreateKeyspace', '--sharding_column_name', 'bad_column', '--sharding_column_type', 'bytes', 'test_keyspace' ]) utils.run_vtctl([ 'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col', 'uint64' ], expect_fail=True) utils.run_vtctl([ 'SetKeyspaceShardingInfo', '-force', 'test_keyspace', 'custom_ksid_col', base_sharding.keyspace_id_type ]) shard_0_master.init_tablet('master', 'test_keyspace', '-80') shard_0_replica.init_tablet('replica', 'test_keyspace', '-80') shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80') shard_1_master.init_tablet('master', 'test_keyspace', '80-') shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-') shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-') shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-') shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-') utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) ks = utils.run_vtctl_json( ['GetSrvKeyspace', 'test_nj', 'test_keyspace']) self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col') # we set full_mycnf_args to True as a test in the KIT_BYTES case full_mycnf_args = ( base_sharding.keyspace_id_type == keyrange_constants.KIT_BYTES) # create databases so vttablet can start behaving somewhat normally for t in [ shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly, shard_1_rdonly1 ]: t.create_db('vt_test_keyspace') t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args) # wait for the tablets (replication is not setup, the slaves won't be # healthy) shard_0_master.wait_for_vttablet_state('SERVING') shard_0_replica.wait_for_vttablet_state('NOT_SERVING') shard_0_ny_rdonly.wait_for_vttablet_state('NOT_SERVING') shard_1_master.wait_for_vttablet_state('SERVING') shard_1_slave1.wait_for_vttablet_state('NOT_SERVING') shard_1_slave2.wait_for_vttablet_state('NOT_SERVING') shard_1_ny_rdonly.wait_for_vttablet_state('NOT_SERVING') shard_1_rdonly1.wait_for_vttablet_state('NOT_SERVING') # reparent to make the tablets work utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias ], auto_log=True) # check the shards shards = utils.run_vtctl_json( ['FindAllShardsInKeyspace', 'test_keyspace']) self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards)) self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards)) self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards)) # create the tables self._create_schema() self._insert_startup_values() self._test_keyrange_constraints() # run a health check on source replicas so they respond to discovery # (for binlog players) and on the source rdonlys (for workers) for t in [shard_0_replica, shard_1_slave1]: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]: utils.run_vtctl(['RunHealthCheck', t.tablet_alias]) # create the split shards shard_2_master.init_tablet('master', 'test_keyspace', '80-c0') shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0') shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0') shard_2_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-c0') shard_3_master.init_tablet('master', 'test_keyspace', 'c0-') shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-') shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-') # start vttablet on the split shards (no db created, # so they're all not serving) shard_2_master.start_vttablet(wait_for_state=None) shard_3_master.start_vttablet(wait_for_state=None) for t in [ shard_2_replica1, shard_2_replica2, shard_2_rdonly1, shard_3_replica, shard_3_rdonly1 ]: t.start_vttablet(wait_for_state=None) for t in [ shard_2_master, shard_2_replica1, shard_2_replica2, shard_2_rdonly1, shard_3_master, shard_3_replica, shard_3_rdonly1 ]: t.wait_for_vttablet_state('NOT_SERVING') utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/80-c0', shard_2_master.tablet_alias ], auto_log=True) utils.run_vtctl([ 'InitShardMaster', 'test_keyspace/c0-', shard_3_master.tablet_alias ], auto_log=True) # check the shards shards = utils.run_vtctl_json( ['FindAllShardsInKeyspace', 'test_keyspace']) for s in ['-80', '80-', '80-c0', 'c0-']: self.assertIn(s, shards, 'unexpected shards: %s' % str(shards)) self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards)) utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # disable shard_1_slave2, so we're sure filtered replication will go # from shard_1_slave1 utils.run_vtctl( ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare']) shard_1_slave2.wait_for_vttablet_state('NOT_SERVING') # we need to create the schema, and the worker will do data copying for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'): utils.run_vtctl([ 'CopySchemaShard', '--exclude_tables', 'unrelated', shard_1_rdonly1.tablet_alias, keyspace_shard ], auto_log=True) # Run vtworker as daemon for the following SplitClone commands. worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg( ['--cell', 'test_nj', '--command_display_interval', '10ms'], auto_log=True) # Copy the data from the source to the destination shards. # --max_tps is only specified to enable the throttler and ensure that the # code is executed. But the intent here is not to throttle the test, hence # the rate limit is set very high. # # Initial clone (online). workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--offline=false', '--exclude_tables', 'unrelated', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999', 'test_keyspace/80-' ], worker_rpc_port) utils.wait_procs([workerclient_proc]) self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 2, 0, 0, 0) # Reset vtworker such that we can run the next command. workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Test the correct handling of keyspace_id changes which happen after # the first clone. # Let row 2 go to shard 3 instead of shard 2. shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set' ' custom_ksid_col=0xD000000000000000 WHERE id=2', write=True) workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--offline=false', '--exclude_tables', 'unrelated', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999', 'test_keyspace/80-' ], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Row 2 will be deleted from shard 2 and inserted to shard 3. self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 1, 0, 1, 1) self._check_value(shard_2_master, 'resharding1', 2, 'msg2', 0xD000000000000000, should_be_here=False) self._check_value(shard_3_master, 'resharding1', 2, 'msg2', 0xD000000000000000) # Reset vtworker such that we can run the next command. workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Move row 2 back to shard 2 from shard 3 by changing the keyspace_id again. shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set' ' custom_ksid_col=0x9000000000000000 WHERE id=2', write=True) workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--offline=false', '--exclude_tables', 'unrelated', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999', 'test_keyspace/80-' ], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Row 2 will be deleted from shard 3 and inserted to shard 2. self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 1, 0, 1, 1) self._check_value(shard_2_master, 'resharding1', 2, 'msg2', 0x9000000000000000) self._check_value(shard_3_master, 'resharding1', 2, 'msg2', 0x9000000000000000, should_be_here=False) # Reset vtworker such that we can run the next command. workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Modify the destination shard. SplitClone will revert the changes. # Delete row 2 (provokes an insert). shard_2_master.mquery('vt_test_keyspace', 'delete from resharding1 where id=2', write=True) # Update row 3 (provokes an update). shard_3_master.mquery( 'vt_test_keyspace', "update resharding1 set msg='msg-not-3' where id=3", write=True) # Insert row 4 and 5 (provokes a delete). self._insert_value(shard_3_master, 'resharding1', 4, 'msg4', 0xD000000000000000) self._insert_value(shard_3_master, 'resharding1', 5, 'msg5', 0xD000000000000000) workerclient_proc = utils.run_vtworker_client_bg([ 'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count', '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999', 'test_keyspace/80-' ], worker_rpc_port) utils.wait_procs([workerclient_proc]) # Change tablet, which was taken offline, back to rdonly. utils.run_vtctl( ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'], auto_log=True) self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1', 1, 1, 2, 0) self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1', 0, 0, 0, 2) # Terminate worker daemon because it is no longer needed. utils.kill_sub_process(worker_proc, soft=True) # TODO(alainjobart): experiment with the dontStartBinlogPlayer option # check the startup values are in the right place self._check_startup_values() # check the schema too utils.run_vtctl([ 'ValidateSchemaKeyspace', '--exclude_tables=unrelated', 'test_keyspace' ], auto_log=True) # check the binlog players are running and exporting vars self.check_destination_master(shard_2_master, ['test_keyspace/80-']) self.check_destination_master(shard_3_master, ['test_keyspace/80-']) # check that binlog server exported the stats vars self.check_binlog_server_vars(shard_1_slave1, horizontal=True) # Check that the throttler was enabled. self.check_throttler_service(shard_2_master.rpc_endpoint(), ['BinlogPlayer/0'], 9999) self.check_throttler_service(shard_3_master.rpc_endpoint(), ['BinlogPlayer/0'], 9999) # testing filtered replication: insert a bunch of data on shard 1, # check we get most of it after a few seconds, wait for binlog server # timeout, check we get all of it. logging.debug('Inserting lots of data on source shard') self._insert_lots(1000) logging.debug('Checking 80 percent of data is sent quickly') v = self._check_lots_timeout(1000, 80, 5) if v != 100: # small optimization: only do this check if we don't have all the data # already anyway. logging.debug('Checking all data goes through eventually') self._check_lots_timeout(1000, 100, 20) logging.debug('Checking no data was sent the wrong way') self._check_lots_not_present(1000) self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'], seconds_behind_master_max=30) self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'], seconds_behind_master_max=30) self.check_binlog_server_vars(shard_1_slave1, horizontal=True, min_statements=1000, min_transactions=1000) # use vtworker to compare the data (after health-checking the destination # rdonly tablets so discovery works) utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias]) logging.debug('Running vtworker SplitDiff') utils.run_vtworker([ '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-' ], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'], auto_log=True) utils.pause('Good time to test vtworker for diffs') # get status for destination master tablets, make sure we have it all self.check_running_binlog_player(shard_2_master, 4000, 2000) self.check_running_binlog_player(shard_3_master, 4000, 2000) # start a thread to insert data into shard_1 in the background # with current time, and monitor the delay insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 1, 10000, 0x9000000000000000) insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 2, 10001, 0xD000000000000000) monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low', 1) monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high', 2) # tests a failover switching serving to a different replica utils.run_vtctl( ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica']) utils.run_vtctl( ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare']) shard_1_slave2.wait_for_vttablet_state('SERVING') shard_1_slave1.wait_for_vttablet_state('NOT_SERVING') utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias]) # test data goes through again logging.debug('Inserting lots of data on source shard') self._insert_lots(1000, base=1000) logging.debug('Checking 80 percent of data was sent quickly') self._check_lots_timeout(1000, 80, 5, base=1000) self.check_binlog_server_vars(shard_1_slave2, horizontal=True, min_statements=800, min_transactions=800) # check we can't migrate the master just yet utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'], expect_fail=True) # check query service is off on master 2 and master 3, as filtered # replication is enabled. Even health check that is enabled on # master 3 should not interfere (we run it to be sure). utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias], auto_log=True) for master in [shard_2_master, shard_3_master]: utils.check_tablet_query_service(self, master, False, False) stream_health = utils.run_vtctl_json( ['VtTabletStreamHealth', '-count', '1', master.tablet_alias]) logging.debug('Got health: %s', str(stream_health)) self.assertIn('realtime_stats', stream_health) self.assertNotIn('serving', stream_health) # check the destination master 3 is healthy, even though its query # service is not running (if not healthy this would exception out) shard_3_master.get_healthz() # now serve rdonly from the split shards, in test_nj only utils.run_vtctl([ 'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-', 'rdonly' ], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_srv_keyspace( 'test_ny', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False) utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False) utils.check_tablet_query_service(self, shard_1_rdonly1, False, True) # now serve rdonly from the split shards, everywhere utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_srv_keyspace( 'test_ny', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False) utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True) utils.check_tablet_query_service(self, shard_1_rdonly1, False, True) # then serve replica from the split shards destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-'] utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-c0 c0-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_tablet_query_service(self, shard_1_slave2, False, True) # move replica back and forth utils.run_vtctl( ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'], auto_log=True) # After a backwards migration, queryservice should be enabled on # source and disabled on destinations utils.check_tablet_query_service(self, shard_1_slave2, True, False) # Destination tablets would have query service disabled for other # reasons than the migration, so check the shard record instead of # the tablets directly. utils.check_shard_query_services(self, destination_shards, topodata_pb2.REPLICA, False) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'], auto_log=True) # After a forwards migration, queryservice should be disabled on # source and enabled on destinations utils.check_tablet_query_service(self, shard_1_slave2, False, True) # Destination tablets would have query service disabled for other # reasons than the migration, so check the shard record instead of # the tablets directly utils.check_shard_query_services(self, destination_shards, topodata_pb2.REPLICA, True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-c0 c0-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') # reparent shard_2 to shard_2_replica1, then insert more data and # see it flow through still utils.run_vtctl([ 'PlannedReparentShard', 'test_keyspace/80-c0', shard_2_replica1.tablet_alias ]) # update our test variables to point at the new master shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master logging.debug( 'Inserting lots of data on source shard after reparenting') self._insert_lots(3000, base=2000) logging.debug('Checking 80 percent of data was sent fairly quickly') self._check_lots_timeout(3000, 80, 10, base=2000) # use vtworker to compare the data again logging.debug('Running vtworker SplitDiff') utils.run_vtworker([ '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated', '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-' ], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'], auto_log=True) utils.run_vtctl( ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'], auto_log=True) # going to migrate the master now, check the delays monitor_thread_1.done = True monitor_thread_2.done = True insert_thread_1.done = True insert_thread_2.done = True logging.debug( 'DELAY 1: %s max_lag=%d ms avg_lag=%d ms', monitor_thread_1.thread_name, monitor_thread_1.max_lag_ms, monitor_thread_1.lag_sum_ms / monitor_thread_1.sample_count) logging.debug( 'DELAY 2: %s max_lag=%d ms avg_lag=%d ms', monitor_thread_2.thread_name, monitor_thread_2.max_lag_ms, monitor_thread_2.lag_sum_ms / monitor_thread_2.sample_count) # mock with the SourceShard records to test 'vtctl SourceShardDelete' # and 'vtctl SourceShardAdd' utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'], auto_log=True) utils.run_vtctl([ 'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0', 'test_keyspace/80-' ], auto_log=True) # then serve master from the split shards, make sure the source master's # query service is now turned off utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'], auto_log=True) utils.check_srv_keyspace( 'test_nj', 'test_keyspace', 'Partitions(master): -80 80-c0 c0-\n' 'Partitions(rdonly): -80 80-c0 c0-\n' 'Partitions(replica): -80 80-c0 c0-\n', keyspace_id_type=base_sharding.keyspace_id_type, sharding_column_name='custom_ksid_col') utils.check_tablet_query_service(self, shard_1_master, False, True) # check the binlog players are gone now self.check_no_binlog_player(shard_2_master) self.check_no_binlog_player(shard_3_master) # delete the original tablets in the original shard tablet.kill_tablets([ shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly, shard_1_rdonly1 ]) for t in [ shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly, shard_1_rdonly1 ]: utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True) utils.run_vtctl( ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias], auto_log=True) # rebuild the serving graph, all mentions of the old shards shoud be gone utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True) # test RemoveShardCell utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True, expect_fail=True) utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True) utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True) shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-']) self.assertNotIn('cells', shard) # delete the original shard utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True) # kill everything tablet.kill_tablets([ shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master, shard_2_replica1, shard_2_replica2, shard_2_rdonly1, shard_3_master, shard_3_replica, shard_3_rdonly1 ])