def test_checksum_enabled(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_enabled: starting @ %s', start_position) # Enable binlog_checksum, which will also force a log rotation that should # cause binlog streamer to notice the new checksum setting. if not mysql_flavor().enable_binlog_checksum(dst_replica): logging.debug( 'skipping checksum test on flavor without binlog_checksum setting') return # Insert something and make sure it comes through intact. sql = ( "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (19283, 1, 'testing checksum enabled') " "/* vtgate:: keyspace_id:00000001 */") src_master.mquery('vt_test_keyspace', sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum enabled. stream = _get_update_stream(dst_replica) found = False for stream_event in stream.stream_update(start_position): if stream_event.category == update_stream.StreamEvent.POS: break if stream_event.sql == sql: found = True break stream.close() self.assertEqual(found, True, 'expected query not found in update stream')
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db("vt_test_keyspace") pos = mysql_flavor().master_position(tablet_62344) changeMasterCmds = mysql_flavor().change_master_commands(utils.hostname, tablet_62344.mysql_port, pos) tablet_62044.mquery("", ["RESET MASTER", "RESET SLAVE"] + changeMasterCmds + ["START SLAVE"]) # now shutdown all mysqld shutdown_procs = [tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql()] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet("master", "test_keyspace", "0") tablet_62044.init_tablet("spare", "test_keyspace", "0", include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet( wait_for_state=None, target_tablet_type="replica", full_mycnf_args=True, include_mysql_port=False ) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state("NOT_SERVING") self.check_healthz(t, False) # restart mysqld start_procs = [tablet_62344.start_mysql(), tablet_62044.start_mysql()] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(["RunHealthCheck", tablet_62344.tablet_alias, "replica"], auto_log=True) self.check_healthz(tablet_62344, True) # the slave won't be healthy at first, as replication is not running utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True) self.check_healthz(tablet_62044, False) tablet_62044.wait_for_vttablet_state("NOT_SERVING") # restart replication tablet_62044.mquery("", ["START SLAVE"]) # wait for the tablet to become healthy and fix its mysql port utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True) tablet_62044.wait_for_vttablet_state("SERVING") self.check_healthz(tablet_62044, True) for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(["GetTablet", t.tablet_alias]) if "mysql" in ti["Portmap"]: break timeout = utils.wait_step("mysql port in tablet record", timeout) self.assertEqual(ti["Portmap"]["mysql"], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def test_checksum_disabled(self): # Disable binlog_checksum to make sure we can also talk to a server without # checksums enabled, in case they are enabled by default. start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_disabled: starting @ %s', start_position) # For flavors that don't support checksums, this is a no-op. mysql_flavor().disable_binlog_checksum(dst_replica) # Insert something and make sure it comes through intact. sql = ( "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (58812, 1, 'testing checksum disabled') " "/* vtgate:: keyspace_id:00000001 */") src_master.mquery( 'vt_test_keyspace', sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum disabled. stream = _get_update_stream(dst_replica) found = False for stream_event in stream.stream_update(start_position): if stream_event.category == update_stream.StreamEvent.POS: break if stream_event.sql == sql: found = True break stream.close() self.assertEqual(found, True, 'expected query not found in update stream')
def test_checksum_enabled(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_enabled: starting @ %s', start_position) # Enable binlog_checksum, which will also force a log rotation that should # cause binlog streamer to notice the new checksum setting. if not mysql_flavor().enable_binlog_checksum(dst_replica): logging.debug('skipping checksum test on flavor without binlog_checksum setting') return # Insert something and make sure it comes through intact. sql = "INSERT INTO test_table (id, keyspace_id, msg) VALUES (19283, 1, 'testing checksum enabled') /* EMD keyspace_id:1 */" src_master.mquery("vt_test_keyspace", sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum enabled. stream = _get_update_stream(dst_replica) stream.dial() data = stream.stream_start(start_position) found = False while data: if data['Category'] == 'POS': break if data['Sql'] == sql: found = True break data = stream.stream_next() stream.close() self.assertEqual(found, True, 'expected query not found in update stream')
def test_checksum_disabled(self): # Disable binlog_checksum to make sure we can also talk to a server without # checksums enabled, in case they are enabled by default. start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_disabled: starting @ %s', start_position) # For flavors that don't support checksums, this is a no-op. mysql_flavor().disable_binlog_checksum(dst_replica) # Insert something and make sure it comes through intact. sql = "INSERT INTO test_table (id, keyspace_id, msg) VALUES (58812, 1, 'testing checksum disabled') /* EMD keyspace_id:1 */" src_master.mquery("vt_test_keyspace", sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum disabled. stream = _get_update_stream(dst_replica) stream.dial() data = stream.stream_start(start_position) found = False while data: if data['Category'] == 'POS': break if data['Sql'] == sql: found = True break data = stream.stream_next() stream.close() self.assertEqual(found, True, 'expected query not found in update stream')
def init_mysql(self, extra_my_cnf=None): if self.use_mysqlctld: return self.mysqlctld( ['-bootstrap_archive', mysql_flavor().bootstrap_archive()], extra_my_cnf=extra_my_cnf) else: return self.mysqlctl( ['init', '-bootstrap_archive', mysql_flavor().bootstrap_archive()], extra_my_cnf=extra_my_cnf, with_ports=True)
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) changeMasterCmds = mysql_flavor().change_master_commands( utils.hostname, tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + changeMasterCmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('spare', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet(wait_for_state=None, target_tablet_type='replica', full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # wait for the tablets to become healthy and fix their mysql port for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('SERVING') for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['Portmap']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['Portmap']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def test_charset(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_charset: starting @ %s', start_position) # Insert something that will replicate incorrectly if the charset is not # propagated through binlog streamer to the destination. # # Vitess tablets default to using utf8, so we insert something crazy and # pretend it's latin1. If the binlog player doesn't also pretend it's # latin1, it will be inserted as utf8, which will change its value. src_master.mquery( 'vt_test_keyspace', "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (41523, 1, 'Šṛ́rỏé') /* vtgate:: keyspace_id:00000001 */", conn_params={'charset': 'latin1'}, write=True) # Wait for it to replicate. event = utils.run_vtctl_json(['VtTabletUpdateStream', '-position', start_position, '-count', '1', dst_replica.tablet_alias]) self.assertIn('event_token', event) self.assertIn('timestamp', event['event_token']) # Check the value. data = dst_master.mquery( 'vt_test_keyspace', 'SELECT id, keyspace_id, msg FROM test_table WHERE id=41523 LIMIT 1') self.assertEqual(len(data), 1, 'No data replicated.') self.assertEqual(len(data[0]), 3, 'Wrong number of columns.') self.assertEqual(data[0][2], 'Šṛ́rỏé', 'Data corrupted due to wrong charset.')
def test_log_rotation(self): start_position = _get_master_current_position() logging.debug('test_log_rotation: starting @ %s', start_position) position = start_position master_tablet.mquery('vt_test_keyspace', 'flush logs') self._exec_vt_txn(self._populate_vt_a(15)) self._exec_vt_txn(['delete from vt_a']) master_conn = self._get_vtgate_stream_conn() master_txn_count = 0 logs_correct = False for event, _ in master_conn.update_stream( 'test_keyspace', topodata_pb2.MASTER, event=query_pb2.EventToken(shard='0', position=start_position), shard='0'): if event.event_token.position: master_txn_count += 1 position = event.event_token.position if mysql_flavor().position_after(position, start_position): logs_correct = True logging.debug('Log rotation correctly interpreted') break if master_txn_count == 2: self.fail('ran out of logs') if not logs_correct: self.fail("Flush logs didn't get properly interpreted") master_conn.close()
def test_charset(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_charset: starting @ %s', start_position) # Insert something that will replicate incorrectly if the charset is not # propagated through binlog streamer to the destination. # # Vitess tablets default to using utf8, so we insert something crazy and # pretend it's latin1. If the binlog player doesn't also pretend it's # latin1, it will be inserted as utf8, which will change its value. src_master.mquery( 'vt_test_keyspace', "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (41523, 1, 'Šṛ́rỏé') /* vtgate:: keyspace_id:00000001 */", conn_params={'charset': 'latin1'}, write=True) # Wait for it to replicate. stream = _get_update_stream(dst_replica) for stream_event in stream.stream_update(start_position): if stream_event.category == update_stream.StreamEvent.POS: break stream.close() # Check the value. data = dst_master.mquery( 'vt_test_keyspace', 'SELECT id, keyspace_id, msg FROM test_table WHERE id=41523 LIMIT 1' ) self.assertEqual(len(data), 1, 'No data replicated.') self.assertEqual(len(data[0]), 3, 'Wrong number of columns.') self.assertEqual(data[0][2], 'Šṛ́rỏé', 'Data corrupted due to wrong charset.')
def mysqlctl(self, cmd, extra_my_cnf=None, with_ports=False, verbose=False): all_extra_my_cnf = [] flavor_my_cnf = mysql_flavor().extra_my_cnf() if flavor_my_cnf: all_extra_my_cnf.append(flavor_my_cnf) if extra_my_cnf: all_extra_my_cnf.append(extra_my_cnf) extra_env = None if all_extra_my_cnf: extra_env = { 'EXTRA_MY_CNF': ':'.join(all_extra_my_cnf), } args = environment.binary_args('mysqlctl') + [ '-log_dir', environment.vtlogroot, '-tablet_uid', str(self.tablet_uid) ] if with_ports: args.extend( ['-port', str(self.port), '-mysql_port', str(self.mysql_port)]) if verbose: args.append('-alsologtostderr') args.extend(cmd) return utils.run_bg(args, extra_env=extra_env)
def test_charset(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_charset: starting @ %s', start_position) # Insert something that will replicate incorrectly if the charset is not # propagated through binlog streamer to the destination. # # Vitess tablets default to using utf8, so we insert something crazy and # pretend it's latin1. If the binlog player doesn't also pretend it's # latin1, it will be inserted as utf8, which will change its value. src_master.mquery( 'vt_test_keyspace', "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (41523, 1, 'Šṛ́rỏé') /* vtgate:: keyspace_id:00000001 */", conn_params={'charset': 'latin1'}, write=True) # Wait for it to replicate. event = utils.run_vtctl_json([ 'VtTabletUpdateStream', '-position', start_position, '-count', '1', dst_replica.tablet_alias ]) self.assertIn('event_token', event) self.assertIn('timestamp', event['event_token']) # Check the value. data = dst_master.mquery( 'vt_test_keyspace', 'SELECT id, keyspace_id, msg FROM test_table WHERE id=41523 LIMIT 1' ) self.assertEqual(len(data), 1, 'No data replicated.') self.assertEqual(len(data[0]), 3, 'Wrong number of columns.') self.assertEqual(data[0][2], 'Šṛ́rỏé', 'Data corrupted due to wrong charset.')
def test_charset(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_charset: starting @ %s', start_position) # Insert something that will replicate incorrectly if the charset is not # propagated through binlog streamer to the destination. # # Vitess tablets default to using utf8, so we insert something crazy and # pretend it's latin1. If the binlog player doesn't also pretend it's # latin1, it will be inserted as utf8, which will change its value. src_master.mquery("vt_test_keyspace", "INSERT INTO test_table (id, keyspace_id, msg) VALUES (41523, 1, 'Šṛ́rỏé') /* EMD keyspace_id:1 */", conn_params={'charset': 'latin1'}, write=True) # Wait for it to replicate. stream = _get_update_stream(dst_replica) stream.dial() data = stream.stream_start(start_position) while data: if data['Category'] == 'POS': break data = stream.stream_next() stream.close() # Check the value. data = dst_master.mquery("vt_test_keyspace", "SELECT id, keyspace_id, msg FROM test_table WHERE id=41523 LIMIT 1") self.assertEqual(len(data), 1, 'No data replicated.') self.assertEqual(len(data[0]), 3, 'Wrong number of columns.') self.assertEqual(data[0][2], 'Šṛ́rỏé', 'Data corrupted due to wrong charset.')
def wait_for_replication_pos(tablet_a, tablet_b, timeout=60.0): """Waits for tablet B to catch up to the replication position of tablet A. If the replication position does not catch up within timeout seconds, it will raise a TestError. """ replication_pos_a = mysql_flavor().master_position(tablet_a) while True: replication_pos_b = mysql_flavor().master_position(tablet_b) if mysql_flavor().position_at_least(replication_pos_b, replication_pos_a): break timeout = wait_step( "%s's replication position to catch up %s's; currently at: %s, waiting to catch up to: %s" % ( tablet_b.tablet_alias, tablet_a.tablet_alias, replication_pos_b, replication_pos_a), timeout, sleep_time=0.1 )
def test_charset(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_charset: starting @ %s', start_position) # Insert something that will replicate incorrectly if the charset is not # propagated through binlog streamer to the destination. # # Vitess tablets default to using utf8, so we insert something crazy and # pretend it's latin1. If the binlog player doesn't also pretend it's # latin1, it will be inserted as utf8, which will change its value. src_master.mquery( 'vt_test_keyspace', "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (41523, 1, 'Šṛ́rỏé') /* vtgate:: keyspace_id:00000001 */", conn_params={'charset': 'latin1'}, write=True) # Wait for it to replicate. stream = _get_update_stream(dst_replica) for event in stream.stream_update('test_keyspace', '-', topodata_pb2.REPLICA, start_position): if event.event_token.position: break stream.close() # Check the value. data = dst_master.mquery( 'vt_test_keyspace', 'SELECT id, keyspace_id, msg FROM test_table WHERE id=41523 LIMIT 1') self.assertEqual(len(data), 1, 'No data replicated.') self.assertEqual(len(data[0]), 3, 'Wrong number of columns.') self.assertEqual(data[0][2], 'Šṛ́rỏé', 'Data corrupted due to wrong charset.')
def external_reparent(self): # Demote master. start = time.time() master.mquery('', mysql_flavor().demote_master_commands(), log_query=True) if master.semi_sync_enabled(): master.set_semi_sync_enabled(master=False) # Wait for replica to catch up to master. utils.wait_for_replication_pos(master, replica) # Wait for at least one second to articially prolong the failover and give # the buffer a chance to observe it. d = time.time() - start min_unavailability_s = 1 if d < min_unavailability_s: w = min_unavailability_s - d logging.debug( 'Waiting for %.1f seconds because the failover was too fast' ' (took only %.3f seconds)', w, d) time.sleep(w) # Promote replica to new master. replica.mquery('', mysql_flavor().promote_slave_commands(), log_query=True) if replica.semi_sync_enabled(): replica.set_semi_sync_enabled(master=True) old_master = master new_master = replica # Configure old master to use new master. new_pos = mysql_flavor().master_position(new_master) logging.debug('New master position: %s', str(new_pos)) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', new_master.mysql_port, new_pos) old_master.mquery('', ['RESET SLAVE'] + change_master_cmds + ['START SLAVE'], log_query=True) # Notify the new vttablet master about the reparent. utils.run_vtctl( ['TabletExternallyReparented', new_master.tablet_alias], auto_log=True)
def get_all_extra_my_cnf(extra_my_cnf): all_extra_my_cnf = [] flavor_my_cnf = mysql_flavor().extra_my_cnf() if flavor_my_cnf: all_extra_my_cnf.append(flavor_my_cnf) if extra_my_cnf: all_extra_my_cnf.append(extra_my_cnf) return all_extra_my_cnf
def wait_for_replication_pos(tablet_a, tablet_b, timeout=60.0): """Waits for tablet B to catch up to the replication position of tablet A. If the replication position does not catch up within timeout seconds, it will raise a TestError. """ replication_pos_a = mysql_flavor().master_position(tablet_a) while True: replication_pos_b = mysql_flavor().master_position(tablet_b) if mysql_flavor().position_at_least(replication_pos_b, replication_pos_a): break timeout = wait_step( "%s's replication position to catch up %s's; " 'currently at: %s, waiting to catch up to: %s' % ( tablet_b.tablet_alias, tablet_a.tablet_alias, replication_pos_b, replication_pos_a), timeout, sleep_time=0.1)
def get_all_extra_my_cnf(extra_my_cnf): all_extra_my_cnf = [environment.vttop + '/config/mycnf/default-fast.cnf'] flavor_my_cnf = mysql_flavor().extra_my_cnf() if flavor_my_cnf: all_extra_my_cnf.append(flavor_my_cnf) if extra_my_cnf: all_extra_my_cnf.append(extra_my_cnf) return all_extra_my_cnf
def test_checksum_disabled(self): # Disable binlog_checksum to make sure we can also talk to a server without # checksums enabled, in case they are enabled by default. start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_disabled: starting @ %s', start_position) # For flavors that don't support checksums, this is a no-op. mysql_flavor().disable_binlog_checksum(dst_replica) # Insert something and make sure it comes through intact. sql = ("INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (58812, 1, 'testing checksum disabled') " "/* vtgate:: keyspace_id:00000001 */") src_master.mquery('vt_test_keyspace', sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum disabled. self._wait_for_replica_event(start_position, sql)
def test_checksum_disabled(self): # Disable binlog_checksum to make sure we can also talk to a server without # checksums enabled, in case they are enabled by default. start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_disabled: starting @ %s', start_position) # For flavors that don't support checksums, this is a no-op. mysql_flavor().disable_binlog_checksum(dst_replica) # Insert something and make sure it comes through intact. sql = ( "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (58812, 1, 'testing checksum disabled') " "/* vtgate:: keyspace_id:00000001 */") src_master.mquery( 'vt_test_keyspace', sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum disabled. self._wait_for_replica_event(start_position, sql)
def external_reparent(self): # Demote master. start = time.time() master.mquery('', mysql_flavor().demote_master_commands(), log_query=True) if master.semi_sync_enabled(): master.set_semi_sync_enabled(master=False) # Wait for replica to catch up to master. utils.wait_for_replication_pos(master, replica) # Wait for at least one second to articially prolong the failover and give # the buffer a chance to observe it. d = time.time() - start min_unavailability_s = 1 if d < min_unavailability_s: w = min_unavailability_s - d logging.debug('Waiting for %.1f seconds because the failover was too fast' ' (took only %.3f seconds)', w, d) time.sleep(w) # Promote replica to new master. replica.mquery('', mysql_flavor().promote_slave_commands(), log_query=True) if replica.semi_sync_enabled(): replica.set_semi_sync_enabled(master=True) old_master = master new_master = replica # Configure old master to use new master. new_pos = mysql_flavor().master_position(new_master) logging.debug('New master position: %s', str(new_pos)) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', new_master.mysql_port, new_pos) old_master.mquery('', ['RESET SLAVE'] + change_master_cmds + ['START SLAVE'], log_query=True) # Notify the new vttablet master about the reparent. utils.run_vtctl(['TabletExternallyReparented', new_master.tablet_alias], auto_log=True)
def test_checksum_enabled(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_enabled: starting @ %s', start_position) # Enable binlog_checksum, which will also force a log rotation that should # cause binlog streamer to notice the new checksum setting. if not mysql_flavor().enable_binlog_checksum(dst_replica): logging.debug( 'skipping checksum test on flavor without binlog_checksum setting') return # Insert something and make sure it comes through intact. sql = ( "INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (19283, 1, 'testing checksum enabled') " "/* vtgate:: keyspace_id:00000001 */") src_master.mquery('vt_test_keyspace', sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum enabled. self._wait_for_replica_event(start_position, sql)
def test_checksum_enabled(self): start_position = mysql_flavor().master_position(dst_replica) logging.debug('test_checksum_enabled: starting @ %s', start_position) # Enable binlog_checksum, which will also force a log rotation that should # cause binlog streamer to notice the new checksum setting. if not mysql_flavor().enable_binlog_checksum(dst_replica): logging.debug( 'skipping checksum test on flavor without binlog_checksum setting' ) return # Insert something and make sure it comes through intact. sql = ("INSERT INTO test_table (id, keyspace_id, msg) " "VALUES (19283, 1, 'testing checksum enabled') " "/* vtgate:: keyspace_id:00000001 */") src_master.mquery('vt_test_keyspace', sql, write=True) # Look for it using update stream to see if binlog streamer can talk to # dst_replica, which now has binlog_checksum enabled. self._wait_for_replica_event(start_position, sql)
def test_log_rotation(self): start_position = _get_master_current_position() position = start_position master_tablet.mquery("vt_test_keyspace", "flush logs") self._exec_vt_txn(self._populate_vt_a(15)) self._exec_vt_txn(["delete from vt_a"]) master_conn = self._get_master_stream_conn() master_txn_count = 0 logs_correct = False for stream_event in master_conn.stream_update(start_position): if stream_event.category == update_stream.StreamEvent.POS: master_txn_count += 1 position = mysql_flavor().position_append(position, stream_event.transaction_id) if mysql_flavor().position_after(position, start_position): logs_correct = True logging.debug("Log rotation correctly interpreted") break if master_txn_count == 2: self.fail("ran out of logs") if not logs_correct: self.fail("Flush logs didn't get properly interpreted")
def wait_for_replication_pos(tablet_a, tablet_b, timeout=60.0): """Waits for tablet B to catch up to the replication position of tablet A. Args: tablet_a: tablet Object for tablet A. tablet_b: tablet Object for tablet B. timeout: Timeout in seconds. Raises: TestError: replication position did not catch up within timeout seconds. """ replication_pos_a = mysql_flavor().master_position(tablet_a) while True: replication_pos_b = mysql_flavor().master_position(tablet_b) if mysql_flavor().position_at_least(replication_pos_b, replication_pos_a): break timeout = wait_step( "%s's replication position to catch up %s's; " 'currently at: %s, waiting to catch up to: %s' % ( tablet_b.tablet_alias, tablet_a.tablet_alias, replication_pos_b, replication_pos_a), timeout, sleep_time=0.1)
def test_log_rotation(self): start_position = _get_master_current_position() position = start_position master_tablet.mquery('vt_test_keyspace', 'flush logs') self._exec_vt_txn(self._populate_vt_a(15)) self._exec_vt_txn(['delete from vt_a']) master_conn = self._get_master_stream_conn() master_conn.dial() data = master_conn.stream_start(start_position) master_txn_count = 0 logs_correct = False while master_txn_count <= 2: data = master_conn.stream_next() if data['Category'] == 'POS': master_txn_count += 1 position = mysql_flavor().position_append(position, data['GTIDField']) if mysql_flavor().position_after(position, start_position): logs_correct = True logging.debug('Log rotation correctly interpreted') break if not logs_correct: self.fail("Flush logs didn't get properly interpreted")
def test_log_rotation(self): start_position = _get_master_current_position() position = start_position master_tablet.mquery('vt_test_keyspace', 'flush logs') self._exec_vt_txn(self._populate_vt_a(15)) self._exec_vt_txn(['delete from vt_a']) master_conn = self._get_master_stream_conn() master_txn_count = 0 logs_correct = False for stream_event in master_conn.stream_update(start_position): if stream_event.category == update_stream.StreamEvent.POS: master_txn_count += 1 position = mysql_flavor().position_append( position, stream_event.transaction_id) if mysql_flavor().position_after(position, start_position): logs_correct = True logging.debug('Log rotation correctly interpreted') break if master_txn_count == 2: self.fail('ran out of logs') if not logs_correct: self.fail("Flush logs didn't get properly interpreted")
def mysqlctl(self, cmd, extra_my_cnf=None, with_ports=False, verbose=False): all_extra_my_cnf = [] flavor_my_cnf = mysql_flavor().extra_my_cnf() if flavor_my_cnf: all_extra_my_cnf.append(flavor_my_cnf) if extra_my_cnf: all_extra_my_cnf.append(extra_my_cnf) extra_env = None if all_extra_my_cnf: extra_env = { 'EXTRA_MY_CNF': ':'.join(all_extra_my_cnf), } args = environment.binary_args('mysqlctl') + [ '-log_dir', environment.vtlogroot, '-tablet_uid', str(self.tablet_uid)] if with_ports: args.extend(['-port', str(self.port), '-mysql_port', str(self.mysql_port)]) if verbose: args.append('-alsologtostderr') args.extend(cmd) return utils.run_bg(args, extra_env=extra_env)
def test_log_rotation(self): start_position = _get_master_current_position() logging.debug("test_log_rotation: starting @ %s", start_position) position = start_position master_tablet.mquery("vt_test_keyspace", "flush logs") self._exec_vt_txn(self._populate_vt_a(15)) self._exec_vt_txn(["delete from vt_a"]) master_conn = self._get_master_stream_conn() master_txn_count = 0 logs_correct = False for event in master_conn.stream_update("test_keyspace", "0", topodata_pb2.MASTER, position=start_position): if event.event_token.position: master_txn_count += 1 position = event.event_token.position if mysql_flavor().position_after(position, start_position): logs_correct = True logging.debug("Log rotation correctly interpreted") break if master_txn_count == 2: self.fail("ran out of logs") if not logs_correct: self.fail("Flush logs didn't get properly interpreted") master_conn.close()
def set_semi_sync_enabled(self, master=None, slave=None): logging.debug('mysql(%s): setting semi-sync mode: master=%s, slave=%s', self.tablet_uid, master, slave) self.mquery( '', mysql_flavor().set_semi_sync_enabled_commands(master, slave))
def _get_repl_current_position(): return mysql_flavor().master_position(replica_tablet)
def reset_replication(self): self.mquery('', mysql_flavor().reset_replication_commands())
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + change_master_cmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('spare', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet(wait_for_state=None, target_tablet_type='replica', full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(t, False) # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias, 'replica'], auto_log=True) self.check_healthz(tablet_62344, True) # the slave won't be healthy at first, as replication is not running utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'], auto_log=True) self.check_healthz(tablet_62044, False) tablet_62044.wait_for_vttablet_state('NOT_SERVING') # restart replication tablet_62044.mquery('', ['START SLAVE']) # wait for the tablet to become healthy and fix its mysql port utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'], auto_log=True) tablet_62044.wait_for_vttablet_state('SERVING') self.check_healthz(tablet_62044, True) for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['port_map']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['port_map']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def _get_master_current_position(): return mysql_flavor().master_position(master_tablet)
def init_mysql(self, extra_my_cnf=None): return self.mysqlctl( ['init', '-bootstrap_archive', mysql_flavor().bootstrap_archive()], extra_my_cnf=extra_my_cnf, with_ports=True)
def setUpModule(): global new_init_db, db_credentials_file global tablet_master, tablet_replica1, tablet_replica2 tablet_master = tablet.Tablet(use_mysqlctld=use_mysqlctld, vt_dba_passwd='VtDbaPass') tablet_replica1 = tablet.Tablet(use_mysqlctld=use_mysqlctld, vt_dba_passwd='VtDbaPass') tablet_replica2 = tablet.Tablet(use_mysqlctld=use_mysqlctld, vt_dba_passwd='VtDbaPass') try: environment.topo_server().setup() credentials = { 'vt_dba': ['VtDbaPass'], 'vt_app': ['VtAppPass'], 'vt_allprivs': ['VtAllprivsPass'], 'vt_repl': ['VtReplPass'], 'vt_filtered': ['VtFilteredPass'], } db_credentials_file = environment.tmproot + '/db_credentials.json' with open(db_credentials_file, 'w') as fd: fd.write(json.dumps(credentials)) # Determine which column is used for user passwords in this MySQL version. proc = tablet_master.init_mysql() if use_mysqlctld: tablet_master.wait_for_mysqlctl_socket() else: utils.wait_procs([proc]) try: tablet_master.mquery('mysql', 'select password from mysql.user limit 0', user='******') password_col = 'password' except MySQLdb.DatabaseError: password_col = 'authentication_string' utils.wait_procs([tablet_master.teardown_mysql()]) tablet_master.remove_tree(ignore_options=True) # Create a new init_db.sql file that sets up passwords for all users. # Then we use a db-credentials-file with the passwords. new_init_db = environment.tmproot + '/init_db_with_passwords.sql' with open(environment.vttop + '/config/init_db.sql') as fd: init_db = fd.read() with open(new_init_db, 'w') as fd: fd.write(init_db) fd.write(mysql_flavor().change_passwords(password_col)) # start mysql instance external to the test setup_procs = [ tablet_master.init_mysql( init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]), tablet_replica1.init_mysql( init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]), tablet_replica2.init_mysql( init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]), ] if use_mysqlctld: tablet_master.wait_for_mysqlctl_socket() tablet_replica1.wait_for_mysqlctl_socket() tablet_replica2.wait_for_mysqlctl_socket() else: utils.wait_procs(setup_procs) except: tearDownModule() raise
def _test_reparent_from_outside(self, brutal=False): """This test will start a master and 3 slaves. Then: - one slave will be the new master - one slave will be reparented to that new master - one slave will be busted and dead in the water and we'll call TabletExternallyReparented. Args: brutal: kills the old master first """ utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('NOT_SERVING') # Reparent as a starting point utils.run_vtctl([ 'InitShardMaster', '-force', 'test_keyspace/0', tablet_62344.tablet_alias ], auto_log=True) # now manually reparent 1 out of 2 tablets # 62044 will be the new master # 31981 won't be re-parented, so it will be busted # Shutdown the old master first. if not brutal: tablet_62344.mquery('', mysql_flavor().demote_master_commands()) # Get the position of the old master and wait for the new one to catch up. utils.wait_for_replication_pos(tablet_62344, tablet_62044) # Promote the new master. tablet_62044.mquery('', mysql_flavor().promote_slave_commands()) new_pos = mysql_flavor().master_position(tablet_62044) logging.debug('New master position: %s', str(new_pos)) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', tablet_62044.mysql_port, new_pos) # 62344 will now be a slave of 62044 tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] + change_master_cmds + ['START SLAVE']) # 41983 will be a slave of 62044 tablet_41983.mquery('', ['STOP SLAVE'] + change_master_cmds + ['START SLAVE']) # in brutal mode, we kill the old master first # and delete its tablet record if brutal: tablet_62344.kill_vttablet() utils.run_vtctl( ['DeleteTablet', '-allow_master', tablet_62344.tablet_alias], auto_log=True) base_time = time.time() # update topology with the new server utils.run_vtctl( ['TabletExternallyReparented', tablet_62044.tablet_alias], mode=utils.VTCTL_VTCTL, auto_log=True) self._test_reparent_from_outside_check(brutal, base_time) if not brutal: tablet_62344.kill_vttablet() tablet.kill_tablets([tablet_31981, tablet_62044, tablet_41983])
def _test_reparent_from_outside(self, brutal=False, fast=False): """This test will start a master and 3 slaves. Then: - one slave will be the new master - one slave will be reparented to that new master - one slave will be busted and ded in the water and we'll call TabletExternallyReparented. Args: brutal: scraps the old master first """ utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.create_db('vt_test_keyspace') extra_args = None if fast: extra_args = ['-fast_external_reparent'] # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') # Reparent as a starting point for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl( ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias], auto_log=True) # now manually reparent 1 out of 2 tablets # 62044 will be the new master # 31981 won't be re-parented, so it will be busted tablet_62044.mquery('', mysql_flavor().promote_slave_commands()) new_pos = mysql_flavor().master_position(tablet_62044) logging.debug('New master position: %s', str(new_pos)) changeMasterCmds = mysql_flavor().change_master_commands( utils.hostname, tablet_62044.mysql_port, new_pos) # 62344 will now be a slave of 62044 tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] + changeMasterCmds + ['START SLAVE']) # 41983 will be a slave of 62044 tablet_41983.mquery('', ['STOP SLAVE'] + changeMasterCmds + ['START SLAVE']) # in brutal mode, we scrap the old master first if brutal: tablet_62344.scrap(force=True) # we have some automated tools that do this too, so it's good to simulate if environment.topo_server().flavor() == 'zookeeper': utils.run( environment.binary_args('zk') + ['rm', '-rf', tablet_62344.zk_tablet_path]) # update zk with the new graph utils.run_vtctl( ['TabletExternallyReparented', tablet_62044.tablet_alias], mode=utils.VTCTL_VTCTL, auto_log=True) self._test_reparent_from_outside_check(brutal) utils.run_vtctl( ['RebuildReplicationGraph', 'test_nj', 'test_keyspace']) self._test_reparent_from_outside_check(brutal) tablet.kill_tablets( [tablet_31981, tablet_62344, tablet_62044, tablet_41983])
def test_basic_recovery(self): """Test recovery from backup flow. test_recovery will: - create a shard with master and replica1 only - run InitShardMaster - insert some data - take a backup - insert more data on the master - create a recovery keyspace - bring up tablet_replica2 in the new keyspace - check that new tablet does not have data created after backup - check that vtgate queries work correctly """ # insert data on master, wait for replica to get it utils.run_vtctl([ 'ApplySchema', '-sql', self._create_vt_insert_test, 'test_keyspace' ], auto_log=True) self._insert_data(tablet_master, 1) self._check_data(tablet_replica1, 1, 'replica1 tablet getting data') master_pos = mysql_flavor().master_position(tablet_master) # backup the replica utils.run_vtctl(['Backup', tablet_replica1.tablet_alias], auto_log=True) # check that the backup shows up in the listing backups = self._list_backups() logging.debug('list of backups: %s', backups) self.assertEqual(len(backups), 1) self.assertTrue(backups[0].endswith(tablet_replica1.tablet_alias)) # backup name is of format date.time.tablet_alias strs = backups[0].split('.') expectedTime = datetime.strptime(strs[0] + '.' + strs[1], '%Y-%m-%d.%H%M%S') # insert more data on the master self._insert_data(tablet_master, 2) utils.run_vtctl( ['ApplyVSchema', '-vschema', self._vschema_json, 'test_keyspace'], auto_log=True) vs = utils.run_vtctl_json(['GetVSchema', 'test_keyspace']) logging.debug('test_keyspace vschema: %s', str(vs)) ks = utils.run_vtctl_json( ['GetSrvKeyspace', 'test_nj', 'test_keyspace']) logging.debug('Serving keyspace before: %s', str(ks)) vs = utils.run_vtctl_json(['GetSrvVSchema', 'test_nj']) logging.debug('Serving vschema before recovery: %s', str(vs)) # now bring up the recovery keyspace with 1 tablet, letting it restore from backup. self._restore(tablet_replica2, 'recovery_keyspace') vs = utils.run_vtctl_json(['GetSrvVSchema', 'test_nj']) logging.debug('Serving vschema after recovery: %s', str(vs)) ks = utils.run_vtctl_json( ['GetSrvKeyspace', 'test_nj', 'test_keyspace']) logging.debug('Serving keyspace after: %s', str(ks)) vs = utils.run_vtctl_json(['GetVSchema', 'recovery_keyspace']) logging.debug('recovery_keyspace vschema: %s', str(vs)) # check the new replica has only 1 row self._check_data(tablet_replica2, 1, 'replica2 tablet should not have new data') # check that the restored replica has the right local_metadata result = tablet_replica2.mquery('_vt', 'select * from local_metadata') metadata = {} for row in result: metadata[row[0]] = row[1] self.assertEqual(metadata['Alias'], 'test_nj-0000062346') self.assertEqual(metadata['ClusterAlias'], 'recovery_keyspace.0') self.assertEqual(metadata['DataCenter'], 'test_nj') self.assertEqual(metadata['RestorePosition'], master_pos) logging.debug('RestoredBackupTime: %s', str(metadata['RestoredBackupTime'])) gotTime = datetime.strptime(metadata['RestoredBackupTime'], '%Y-%m-%dT%H:%M:%SZ') self.assertEqual(gotTime, expectedTime) # update original 1st row in master tablet_master.mquery( 'vt_test_keyspace', "update vt_insert_test set msg='new msg' where id=1", write=True) # verify that master has new value result = tablet_master.mquery( 'vt_test_keyspace', 'select msg from vt_insert_test where id=1') self.assertEqual(result[0][0], 'new msg') # verify that restored replica has old value result = tablet_replica2.mquery( 'vt_test_keyspace', 'select msg from vt_insert_test where id=1') self.assertEqual(result[0][0], 'test 1') # start vtgate vtgate = utils.VtGate() vtgate.start(tablets=[tablet_master, tablet_replica1, tablet_replica2], tablet_types_to_wait='REPLICA') utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1) utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1) utils.vtgate.wait_for_endpoints('recovery_keyspace.0.replica', 1) # check that vtgate doesn't route queries to new tablet vtgate_conn = get_connection() cursor = vtgate_conn.cursor(tablet_type='replica', keyspace=None, writable=True) cursor.execute('select count(*) from vt_insert_test', {}) result = cursor.fetchall() if not result: self.fail('Result cannot be null') else: self.assertEqual(result[0][0], 2) cursor.execute('select msg from vt_insert_test where id=1', {}) result = cursor.fetchall() if not result: self.fail('Result cannot be null') else: self.assertEqual(result[0][0], 'new msg') # check that new keyspace is accessible by using ks.table cursor.execute('select count(*) from recovery_keyspace.vt_insert_test', {}) result = cursor.fetchall() if not result: self.fail('Result cannot be null') else: self.assertEqual(result[0][0], 1) cursor.execute( 'select msg from recovery_keyspace.vt_insert_test where id=1', {}) result = cursor.fetchall() if not result: self.fail('Result cannot be null') else: self.assertEqual(result[0][0], 'test 1') # check that new keyspace is accessible with 'use ks' cursor.execute('use recovery_keyspace@replica', {}) cursor.execute('select count(*) from vt_insert_test', {}) result = cursor.fetchall() if not result: self.fail('Result cannot be null') else: self.assertEqual(result[0][0], 1) cursor.execute( 'select msg from recovery_keyspace.vt_insert_test where id=1', {}) result = cursor.fetchall() if not result: self.fail('Result cannot be null') else: self.assertEqual(result[0][0], 'test 1') # TODO check that new tablet is accessible with 'use ks:shard' # this currently does not work through the python client, though it works from mysql client #cursor.execute('use recovery_keyspace:0@replica', {}) #cursor.execute('select count(*) from vt_insert_test', {}) #result = cursor.fetchall() #if not result: #self.fail('Result cannot be null') #else: #self.assertEqual(result[0][0], 1) vtgate_conn.close() tablet_replica2.kill_vttablet() vtgate.kill()
def set_semi_sync_enabled(self, master=None, slave=None): logging.debug('mysql(%s): setting semi-sync mode: master=%s, slave=%s', self.tablet_uid, master, slave) self.mquery('', mysql_flavor().set_semi_sync_enabled_commands(master, slave))
def setUpModule(): global new_init_db, db_credentials_file try: credentials = { 'vt_dba': ['VtDbaPass'], 'vt_app': ['VtAppPass'], 'vt_allprivs': ['VtAllprivsPass'], 'vt_repl': ['VtReplPass'], 'vt_filtered': ['VtFilteredPass'], } db_credentials_file = environment.tmproot + '/db_credentials.json' with open(db_credentials_file, 'w') as fd: fd.write(json.dumps(credentials)) # Determine which column is used for user passwords in this MySQL version. proc = ks1_shard_master.init_mysql() utils.wait_procs([proc]) try: ks1_shard_master.mquery('mysql', 'select password from mysql.user limit 0', user='******') password_col = 'password' except MySQLdb.DatabaseError: password_col = 'authentication_string' utils.wait_procs([ks1_shard_master.teardown_mysql()]) ks1_shard_master.remove_tree(ignore_options=True) # Create a new init_db.sql file that sets up passwords for all users. # Then we use a db-credentials-file with the passwords. new_init_db = environment.tmproot + '/init_db_with_passwords.sql' with open(environment.vttop + '/config/init_db.sql') as fd: init_db = fd.read() with open(new_init_db, 'w') as fd: fd.write(init_db) fd.write(mysql_flavor().change_passwords(password_col)) fd.write(''' # connecting through a port requires 127.0.0.1 # --host=localhost will connect through socket CREATE USER 'vt_dba'@'127.0.0.1' IDENTIFIED BY 'VtDbaPass'; GRANT ALL ON *.* TO 'vt_dba'@'127.0.0.1'; GRANT GRANT OPTION ON *.* TO 'vt_dba'@'127.0.0.1'; # User for app traffic, with global read-write access. CREATE USER 'vt_app'@'127.0.0.1' IDENTIFIED BY 'VtAppPass'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_app'@'127.0.0.1'; # User for administrative operations that need to be executed as non-SUPER. # Same permissions as vt_app here. CREATE USER 'vt_allprivs'@'127.0.0.1' IDENTIFIED BY 'VtAllPrivsPass'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_allprivs'@'127.0.0.1'; # User for Vitess filtered replication (binlog player). # Same permissions as vt_app. CREATE USER 'vt_filtered'@'127.0.0.1' IDENTIFIED BY 'VtFilteredPass'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_filtered'@'127.0.0.1'; FLUSH PRIVILEGES; ''') setup_procs = [ t.init_mysql( use_rbr=True, init_db=new_init_db, extra_args=['-db-credentials-file', db_credentials_file]) for t in all_mysql_tablets ] utils.wait_procs(setup_procs) for i in range(0, len(all_other_tablets)): all_other_tablets[i].mysql_port = all_mysql_tablets[i].mysql_port environment.topo_server().setup() except: tearDownModule() raise
def _test_reparent_from_outside(self, brutal=False, fast=False): """This test will start a master and 3 slaves. Then: - one slave will be the new master - one slave will be reparented to that new master - one slave will be busted and ded in the water and we'll call TabletExternallyReparented. Args: brutal: scraps the old master first """ utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.create_db('vt_test_keyspace') extra_args = None if fast: extra_args = ['-fast_external_reparent'] # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False, extra_args=extra_args) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') # Reparent as a starting point for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0', tablet_62344.tablet_alias], auto_log=True) # now manually reparent 1 out of 2 tablets # 62044 will be the new master # 31981 won't be re-parented, so it will be busted tablet_62044.mquery('', mysql_flavor().promote_slave_commands()) new_pos = mysql_flavor().master_position(tablet_62044) logging.debug('New master position: %s', str(new_pos)) changeMasterCmds = mysql_flavor().change_master_commands( utils.hostname, tablet_62044.mysql_port, new_pos) # 62344 will now be a slave of 62044 tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] + changeMasterCmds + ['START SLAVE']) # 41983 will be a slave of 62044 tablet_41983.mquery('', ['STOP SLAVE'] + changeMasterCmds + ['START SLAVE']) # in brutal mode, we scrap the old master first if brutal: tablet_62344.scrap(force=True) # we have some automated tools that do this too, so it's good to simulate if environment.topo_server().flavor() == 'zookeeper': utils.run(environment.binary_args('zk') + ['rm', '-rf', tablet_62344.zk_tablet_path]) # update zk with the new graph utils.run_vtctl(['TabletExternallyReparented', tablet_62044.tablet_alias], mode=utils.VTCTL_VTCTL, auto_log=True) self._test_reparent_from_outside_check(brutal) utils.run_vtctl(['RebuildReplicationGraph', 'test_nj', 'test_keyspace']) self._test_reparent_from_outside_check(brutal) tablet.kill_tablets([tablet_31981, tablet_62344, tablet_62044, tablet_41983])
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + change_master_cmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('replica', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: # Since MySQL is down at this point and we want the tablet to start up # successfully, we have to use supports_backups=False. t.start_vttablet(wait_for_state=None, supports_backups=False, full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(t, False) # Tell slave to not try to repair replication in healthcheck. # The StopSlave will ultimately fail because mysqld is not running, # But vttablet should remember that it's not supposed to fix replication. utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias], expect_fail=True) # The above notice to not fix replication should survive tablet restart. tablet_62044.kill_vttablet() tablet_62044.start_vttablet(wait_for_state='NOT_SERVING', full_mycnf_args=True, include_mysql_port=False, supports_backups=False) # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias], auto_log=True) self.check_healthz(tablet_62344, True) # the slave will now be healthy, but report a very high replication # lag, because it can't figure out what it exactly is. utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias], auto_log=True) tablet_62044.wait_for_vttablet_state('SERVING') self.check_healthz(tablet_62044, True) health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias]) self.assertIn('seconds_behind_master', health['realtime_stats']) self.assertEqual(health['realtime_stats']['seconds_behind_master'], 7200) self.assertIn('serving', health) # restart replication, wait until health check goes small # (a value of zero is default and won't be in structure) utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias]) timeout = 10 while True: utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias], auto_log=True) health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias]) if 'serving' in health and ( ('seconds_behind_master' not in health['realtime_stats']) or (health['realtime_stats']['seconds_behind_master'] < 30)): break timeout = utils.wait_step('health delay goes back down', timeout) # wait for the tablet to fix its mysql port for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['port_map']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['port_map']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def test_no_mysql_healthcheck(self): """This test starts a vttablet with no mysql port, while mysql is down. It makes sure vttablet will start properly and be unhealthy. Then we start mysql, and make sure vttablet becomes healthy. """ # we need replication to be enabled, so the slave tablet can be healthy. for t in tablet_62344, tablet_62044: t.create_db('vt_test_keyspace') pos = mysql_flavor().master_position(tablet_62344) changeMasterCmds = mysql_flavor().change_master_commands( utils.hostname, tablet_62344.mysql_port, pos) tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] + changeMasterCmds + ['START SLAVE']) # now shutdown all mysqld shutdown_procs = [ tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql(), ] utils.wait_procs(shutdown_procs) # start the tablets, wait for them to be NOT_SERVING (mysqld not there) tablet_62344.init_tablet('master', 'test_keyspace', '0') tablet_62044.init_tablet('spare', 'test_keyspace', '0', include_mysql_port=False) for t in tablet_62344, tablet_62044: t.start_vttablet(wait_for_state=None, target_tablet_type='replica', full_mycnf_args=True, include_mysql_port=False) for t in tablet_62344, tablet_62044: t.wait_for_vttablet_state('NOT_SERVING') self.check_healthz(t, False) # restart mysqld start_procs = [ tablet_62344.start_mysql(), tablet_62044.start_mysql(), ] utils.wait_procs(start_procs) # the master should still be healthy utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias, 'replica'], auto_log=True) self.check_healthz(tablet_62344, True) # the slave won't be healthy at first, as replication is not running utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'], auto_log=True) self.check_healthz(tablet_62044, False) tablet_62044.wait_for_vttablet_state('NOT_SERVING') # restart replication tablet_62044.mquery('', ['START SLAVE']) # wait for the tablet to become healthy and fix its mysql port utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'], auto_log=True) tablet_62044.wait_for_vttablet_state('SERVING') self.check_healthz(tablet_62044, True) for t in tablet_62344, tablet_62044: # wait for mysql port to show up timeout = 10 while True: ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias]) if 'mysql' in ti['Portmap']: break timeout = utils.wait_step('mysql port in tablet record', timeout) self.assertEqual(ti['Portmap']['mysql'], t.mysql_port) # all done tablet.kill_tablets([tablet_62344, tablet_62044])
def _test_reparent_from_outside(self, brutal=False): """This test will start a master and 3 slaves. Then: - one slave will be the new master - one slave will be reparented to that new master - one slave will be busted and dead in the water and we'll call TabletExternallyReparented. Args: brutal: kills the old master first """ utils.run_vtctl(['CreateKeyspace', 'test_keyspace']) # create the database so vttablets start, as they are serving for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.create_db('vt_test_keyspace') # Start up a master mysql and vttablet tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True, wait_for_start=False) # Create a few slaves for testing reparenting. tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True, wait_for_start=False) # wait for all tablets to start for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.wait_for_vttablet_state('SERVING') # Reparent as a starting point for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]: t.reset_replication() utils.run_vtctl(['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias], auto_log=True) # now manually reparent 1 out of 2 tablets # 62044 will be the new master # 31981 won't be re-parented, so it will be busted tablet_62044.mquery('', mysql_flavor().promote_slave_commands()) new_pos = mysql_flavor().master_position(tablet_62044) logging.debug('New master position: %s', str(new_pos)) # Use 'localhost' as hostname because Travis CI worker hostnames # are too long for MySQL replication. change_master_cmds = mysql_flavor().change_master_commands( 'localhost', tablet_62044.mysql_port, new_pos) # 62344 will now be a slave of 62044 tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] + change_master_cmds + ['START SLAVE']) # 41983 will be a slave of 62044 tablet_41983.mquery('', ['STOP SLAVE'] + change_master_cmds + ['START SLAVE']) # in brutal mode, we kill the old master first # and delete its tablet record if brutal: tablet_62344.kill_vttablet() utils.run_vtctl(['DeleteTablet', '-allow_master', tablet_62344.tablet_alias], auto_log=True) base_time = time.time() # update topology with the new server utils.run_vtctl(['TabletExternallyReparented', tablet_62044.tablet_alias], mode=utils.VTCTL_VTCTL, auto_log=True) self._test_reparent_from_outside_check(brutal, base_time) # RebuildReplicationGraph will rebuild the topo data from # the tablet records. It is an emergency command only. utils.run_vtctl(['RebuildReplicationGraph', 'test_nj', 'test_keyspace']) self._test_reparent_from_outside_check(brutal, base_time) if not brutal: tablet_62344.kill_vttablet() tablet.kill_tablets([tablet_31981, tablet_62044, tablet_41983])