def test_service_disabled(self):
    # perform some inserts, then change state to stop the invalidator
    self.perform_insert(500)
    inv_before = self.replica_stats()['Totals']['Invalidations']
    invStats_before = self.replica_vars()
    utils.run_vtctl(['ChangeSlaveType', replica_tablet.tablet_alias, 'spare'])

    # wait until it's stopped
    timeout = 30
    while True:
      invStats_after = self.replica_vars()
      if invStats_after['RowcacheInvalidatorState'] == 'Stopped':
        break
      timeout = utils.wait_step(
          'RowcacheInvalidatorState, got %s expecting Stopped' %
          invStats_after['RowcacheInvalidatorState'], timeout, sleep_time=0.1)

    # check all data is right
    inv_after = self.replica_stats()['Totals']['Invalidations']
    invStats_after = self.replica_vars()
    logging.debug(
        'Tablet Replica->Spare\n\tBefore: Invalidations: %d InvalidatorStats '
        '%s\n\tAfter: Invalidations: %d InvalidatorStats %s',
        inv_before, invStats_before['RowcacheInvalidatorPosition'],
        inv_after, invStats_after['RowcacheInvalidatorPosition'])
    self.assertEqual(inv_after, 0,
                     'Row-cache invalid. should be disabled, no invalidations')
    self.assertEqual(invStats_after['RowcacheInvalidatorState'], 'Stopped',
                     'Row-cache invalidator should be disabled')

    # and restore the type
    utils.run_vtctl(
        ['ChangeSlaveType', replica_tablet.tablet_alias, 'replica'])
Beispiel #2
0
  def test_restart(self):
    """test_restart tests that when starting a second vttablet with the same
    configuration as another one, it will kill the previous process
    and take over listening on the socket.

    If vttablet listens to other ports (like gRPC), this feature will
    break. We believe it is not widely used, so we're OK with this for now.
    (container based installations usually handle tablet restarts
    by using a different set of servers, and do not rely on this feature
    at all).
    """
    if environment.topo_server().flavor() != 'zookeeper':
      logging.info("Skipping this test in non-github tree")
      return
    if tablet_62344.grpc_enabled():
      logging.info("Skipping this test as second gRPC port interferes")
      return

    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as it is serving
    tablet_62344.create_db('vt_test_keyspace')

    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    proc1 = tablet_62344.start_vttablet()
    proc2 = tablet_62344.start_vttablet()
    for timeout in xrange(20):
      logging.debug("Sleeping waiting for first process to die")
      time.sleep(1.0)
      proc1.poll()
      if proc1.returncode is not None:
        break
    if proc1.returncode is None:
      self.fail("proc1 still running")
    tablet_62344.kill_vttablet()
 def test_stop_replication(self):
   utils.debug("===========test_stop_replication=========")
   utils.run_vtctl('ChangeSlaveType test_nj-0000062345 replica')
   time.sleep(10)
   perform_insert(100)
   master_position = utils.mysql_query(62344, 'vt_test_keyspace', 'show master status')
   #The sleep is needed here, so the invalidator can catch up and the number can be tested.
   replica_tablet.mquery('vt_test_keyspace', "select MASTER_POS_WAIT('%s', %d)" % (master_position[0][0], master_position[0][1]), 5)
   time.sleep(5)
   inv_count1 = framework.MultiDict(json.load(urllib2.urlopen("http://%s/debug/table_stats" % replica_host)))['Totals']['Invalidations']
   replica_tablet.mquery('vt_test_keyspace', "stop slave")
   perform_insert(100)
   # EOF is returned after 30s, sleeping a bit more to ensure we catch the EOF
   # and can test replication stop effectively.
   time.sleep(35)
   replica_tablet.mquery('vt_test_keyspace', "start slave")
   master_position = utils.mysql_query(62344, 'vt_test_keyspace', 'show master status')
   #The sleep is needed here, so the invalidator can catch up and the number can be tested.
   replica_tablet.mquery('vt_test_keyspace', "select MASTER_POS_WAIT('%s', %d)" % (master_position[0][0], master_position[0][1]), 5)
   time.sleep(10)
   invalidatorStats = framework.MultiDict(json.load(urllib2.urlopen("http://%s/debug/vars" % replica_host)))['CacheInvalidationProcessor']
   utils.debug("invalidatorStats %s" % invalidatorStats)
   inv_count2 = framework.MultiDict(json.load(urllib2.urlopen("http://%s/debug/table_stats" % replica_host)))['Totals']['Invalidations']
   utils.debug("invalidator count1 %d count2 %d" % (inv_count1, inv_count2))
   self.assertEqual(invalidatorStats["States"]["Current"], "Enabled", "Row-cache invalidator should be enabled")
   self.assertTrue(inv_count2 - inv_count1 > 0, "invalidator was able to restart after a small pause in replication")
Beispiel #4
0
  def check_throttler_service_maxrates(self, throttler_server, names, rate):
    """Checks the vtctl ThrottlerMaxRates and ThrottlerSetRate commands."""
    # Avoid flakes by waiting for all throttlers. (Necessary because filtered
    # replication on vttablet will register the throttler asynchronously.)
    timeout_s = 10
    while True:
      stdout, _ = utils.run_vtctl(['ThrottlerMaxRates', '--server',
                                   throttler_server], auto_log=True,
                                  trap_output=True)
      if '%d active throttler(s)' % len(names) in stdout:
        break
      timeout_s = utils.wait_step('all throttlers registered', timeout_s)
    for name in names:
      self.assertIn('| %s | %d |' % (name, rate), stdout)
    self.assertIn('%d active throttler(s)' % len(names), stdout)

    # Check that it's possible to change the max rate on the throttler.
    new_rate = 'unlimited'
    stdout, _ = utils.run_vtctl(['ThrottlerSetMaxRate', '--server',
                                 throttler_server, new_rate],
                                auto_log=True, trap_output=True)
    self.assertIn('%d active throttler(s)' % len(names), stdout)
    stdout, _ = utils.run_vtctl(['ThrottlerMaxRates', '--server',
                                 throttler_server], auto_log=True,
                                trap_output=True)
    for name in names:
      self.assertIn('| %s | %s |' % (name, new_rate), stdout)
    self.assertIn('%d active throttler(s)' % len(names), stdout)
Beispiel #5
0
def setup_unsharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', UNSHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', UNSHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])
  unsharded_master.init_tablet('master', keyspace=UNSHARDED_KEYSPACE, shard='0')
  unsharded_replica.init_tablet('replica', keyspace=UNSHARDED_KEYSPACE, shard='0')
  unsharded_rdonly.init_tablet('rdonly', keyspace=UNSHARDED_KEYSPACE, shard='0')

  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE,], auto_log=True)

  for t in [unsharded_master, unsharded_replica, unsharded_rdonly]:
    t.create_db('vt_test_keyspace_unsharded')
    t.mquery(unsharded_master.dbname, create_vt_insert_test)
    t.start_vttablet(wait_for_state=None)

  for t in [unsharded_master, unsharded_replica, unsharded_rdonly]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['ReparentShard', '-force', '%s/0' % UNSHARDED_KEYSPACE,
                   unsharded_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE],
                   auto_log=True)

  utils.check_srv_keyspace('test_nj', UNSHARDED_KEYSPACE,
                           'Partitions(master): -\n' +
                           'Partitions(rdonly): -\n' +
                           'Partitions(replica): -\n' +
                           'TabletTypes: master,rdonly,replica')
Beispiel #6
0
  def _verify_vtctl_set_shard_tablet_control(self):
    """Test that manually editing the blacklisted tables works correctly.

    TODO(mberlin): This is more an integration test and should be moved to the
    Go codebase eventually.
    """
    # check 'vtctl SetShardTabletControl' command works as expected:
    # clear the rdonly entry:
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'rdonly'], auto_log=True)
    self._assert_tablet_controls([topodata_pb2.MASTER, topodata_pb2.REPLICA])

    # re-add rdonly:
    utils.run_vtctl(['SetShardTabletControl', '--tables=moving.*,view1',
                     'source_keyspace/0', 'rdonly'], auto_log=True)
    self._assert_tablet_controls([topodata_pb2.MASTER, topodata_pb2.REPLICA,
                                  topodata_pb2.RDONLY])

    # and then clear all entries:
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'replica'], auto_log=True)
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'master'], auto_log=True)
    shard_json = utils.run_vtctl_json(['GetShard', 'source_keyspace/0'])
    self.assertNotIn('tablet_controls', shard_json)
Beispiel #7
0
 def test_service_switch(self):
     """tests the service switch from disable -> enable -> disable"""
     self._test_service_disabled()
     self._test_service_enabled()
     # The above tests leaves the service in disabled state, hence enabling it.
     utils.run_vtctl(["ChangeSlaveType", replica_tablet.tablet_alias, "replica"])
     utils.wait_for_tablet_type(replica_tablet.tablet_alias, tablet.Tablet.tablet_type_value["REPLICA"], 30)
Beispiel #8
0
 def copy_schema_to_destination_shards(self):
   for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
     utils.run_vtctl(['CopySchemaShard',
                      '--exclude_tables', 'unrelated',
                      shard_rdonly1.tablet_alias,
                      keyspace_shard],
                     auto_log=True)
Beispiel #9
0
  def _create_source_schema(self):
    create_table_template = '''create table %s(
id bigint not null,
msg varchar(64),
primary key (id),
index by_msg (msg)
) Engine=InnoDB'''
    create_view_template = 'create view %s(id, msg) as select id, msg from %s'

    for t in ['moving1', 'moving2', 'staying1', 'staying2']:
      utils.run_vtctl(['ApplySchema',
                       '-sql=' + create_table_template % (t),
                       'source_keyspace'],
                      auto_log=True)
    utils.run_vtctl(['ApplySchema',
                     '-sql=' + create_view_template % ('view1', 'moving1'),
                     'source_keyspace'],
                    auto_log=True)
    for t in [source_master, source_replica, source_rdonly1, source_rdonly2]:
      utils.run_vtctl(['ReloadSchema', t.tablet_alias])

    # Add a table to the destination keyspace which should be ignored.
    utils.run_vtctl(['ApplySchema',
                     '-sql=' + create_table_template % 'extra1',
                     'destination_keyspace'],
                    auto_log=True)
    for t in [destination_master, destination_replica,
              destination_rdonly1, destination_rdonly2]:
      utils.run_vtctl(['ReloadSchema', t.tablet_alias])
Beispiel #10
0
  def test_vtaction_dies_hard(self):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as it is serving
    tablet_62344.create_db('vt_test_keyspace')

    tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)

    # start a 'vtctl Sleep' command, don't wait for it
    action_path, _ = utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '60s'], trap_output=True)
    action_path = action_path.strip()

    # wait for the action to be 'Running', capture its pid
    timeout = 10
    while True:
      an = utils.run_vtctl_json(['ReadTabletAction', action_path])
      if an.get('State', None) == 'Running':
        pid = an['Pid']
        logging.info("Action is running with pid %u, good", pid)
        break
      timeout = utils.wait_step('sleep action to run', timeout)

    # let's kill it hard, wait until it's gone for good
    os.kill(pid, signal.SIGKILL)
    try:
      os.waitpid(pid, 0)
    except OSError:
      # this means the process doesn't exist any more, we're good
      pass

    # Then let's make sure the next action cleans up properly and can execute.
    # If that doesn't work, this will time out and the test will fail.
    utils.run_vtctl(['Ping', tablet_62344.tablet_alias])

    tablet_62344.kill_vttablet()
Beispiel #11
0
  def test_vtgate_qps(self):
    # create the topology
    utils.run_vtctl('CreateKeyspace test_keyspace')
    t = tablet.Tablet(tablet_uid=1, cell="nj")
    t.init_tablet("master", "test_keyspace", "0")
    t.update_addrs()
    utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True)

    # start vtgate and the qps-er
    vtgate_proc, vtgate_port = utils.vtgate_start(
        extra_args=['-cpu_profile', os.path.join(environment.tmproot,
                                                 'vtgate.pprof')])
    qpser = utils.run_bg(environment.binary_args('zkclient2') + [
        '-server', 'localhost:%u' % vtgate_port,
        '-mode', 'qps',
        '-zkclient_cpu_profile', os.path.join(environment.tmproot, 'zkclient2.pprof'),
        'test_nj', 'test_keyspace'])
    qpser.wait()

    # get the vtgate vars, make sure we have what we need
    v = utils.get_vars(vtgate_port)

    # some checks on performance / stats
    rpcCalls = v['TopoReaderRpcQueryCount']['test_nj']
    if rpcCalls < MIN_QPS * 10:
      self.fail('QPS is too low: %u < %u' % (rpcCalls / 10, MIN_QPS))
    else:
      logging.debug("Recorded qps: %u", rpcCalls / 10)
    utils.vtgate_kill(vtgate_proc)
Beispiel #12
0
  def test_sigterm(self):
    utils.run_vtctl('CreateKeyspace test_keyspace')

    # create the database so vttablets start, as it is serving
    tablet_62344.create_db('vt_test_keyspace')

    tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)

    # start a 'vtctl Sleep' command in the background
    args = [environment.binary_path('vtctl'),
            '-log_dir', environment.vtlogroot,
            '--alsologtostderr']
    args.extend(environment.topo_server_flags())
    args.extend(environment.tablet_manager_protocol_flags())
    args.extend(['Sleep', tablet_62344.tablet_alias, '60s'])
    sp = utils.run_bg(args, stdout=PIPE, stderr=PIPE)

    # wait for it to start, and let's kill it
    time.sleep(4.0)
    utils.run(['pkill', 'vtaction'])
    out, err = sp.communicate()

    # check the vtctl command got the right remote error back
    if "vtaction interrupted by signal" not in err:
      self.fail("cannot find expected output in error: " + err)
    logging.debug("vtaction was interrupted correctly:\n" + err)

    tablet_62344.kill_vttablet()
Beispiel #13
0
  def test_sigterm(self):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as it is serving
    tablet_62344.create_db('vt_test_keyspace')

    tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)

    # start a 'vtctl Sleep' command, don't wait for it
    action_path, _ = utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '60s'], trap_output=True)
    action_path = action_path.strip()

    # wait for the action to be 'Running', capture its pid
    timeout = 10
    while True:
      an = utils.run_vtctl_json(['ReadTabletAction', action_path])
      if an.get('State', None) == 'Running':
        pid = an['Pid']
        logging.info("Action is running with pid %u, good", pid)
        break
      timeout = utils.wait_step('sleep action to run', timeout)

    # let's kill the vtaction process with a regular SIGTERM
    os.kill(pid, signal.SIGTERM)

    # check the vtctl command got the right remote error back
    out, err = utils.run_vtctl(['WaitForAction', action_path], trap_output=True,
                               raise_on_error=False)
    if "vtaction interrupted by signal" not in err:
      self.fail("cannot find expected output in error: " + err)
    logging.debug("vtaction was interrupted correctly:\n" + err)

    tablet_62344.kill_vttablet()
Beispiel #14
0
  def _test_vtctl_copyschemashard(self, source):
    # Apply initial schema to the whole keyspace before creating shard 2.
    self._apply_initial_schema()

    _setup_shard_2()

    try:
      # InitShardMaster creates the db, but there shouldn't be any tables yet.
      self._check_tables(shard_2_master, 0)
      self._check_tables(shard_2_replica1, 0)

      # Run the command twice to make sure it's idempotent.
      for _ in range(2):
        utils.run_vtctl(['CopySchemaShard',
                         source,
                         'test_keyspace/2'],
                        auto_log=True)

        # shard_2_master should look the same as the replica we copied from
        self._check_tables(shard_2_master, 4)
        utils.wait_for_replication_pos(shard_2_master, shard_2_replica1)
        self._check_tables(shard_2_replica1, 4)
        shard_0_schema = self._get_schema(shard_0_master.tablet_alias)
        shard_2_schema = self._get_schema(shard_2_master.tablet_alias)
        self.assertEqual(shard_0_schema, shard_2_schema)
    finally:
      _teardown_shard_2()
Beispiel #15
0
  def check_stream_health_equals_binlog_player_vars(self, tablet_obj, count):
    """Checks the variables exported by streaming health check match vars.

    Args:
      tablet_obj: the tablet to check.
      count: number of binlog players to expect.
    """

    blp_stats = utils.get_vars(tablet_obj.port)
    self.assertEqual(blp_stats['BinlogPlayerMapSize'], count)

    # Enforce health check because it's not running by default as
    # tablets may not be started with it, or may not run it in time.
    utils.run_vtctl(['RunHealthCheck', tablet_obj.tablet_alias])
    stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                          '-count', '1',
                                          tablet_obj.tablet_alias])
    logging.debug('Got health: %s', str(stream_health))
    self.assertNotIn('serving', stream_health)
    self.assertIn('realtime_stats', stream_health)
    self.assertNotIn('health_error', stream_health['realtime_stats'])
    self.assertIn('binlog_players_count', stream_health['realtime_stats'])
    self.assertEqual(blp_stats['BinlogPlayerMapSize'],
                     stream_health['realtime_stats']['binlog_players_count'])
    self.assertEqual(blp_stats['BinlogPlayerSecondsBehindMaster'],
                     stream_health['realtime_stats'].get(
                         'seconds_behind_master_filtered_replication', 0))
Beispiel #16
0
def test_multisnapshot_mysqlctl():
  populate = sum([[
    "insert into vt_insert_test_%s (msg) values ('test %s')" % (i, x)
    for x in xrange(4)] for i in range(6)], [])
  create = ['''create table vt_insert_test_%s (
id bigint auto_increment,
msg varchar(64),
primary key (id)
) Engine=InnoDB''' % i for i in range(6)]

  utils.zk_wipe()

  # Start up a master mysql and vttablet
  utils.run_vtctl('CreateKeyspace -force test_keyspace')

  tablet_62344.init_tablet('master', 'test_keyspace', '0')
  utils.run_vtctl('RebuildShardGraph test_keyspace/0')
  utils.validate_topology()

  tablet_62344.populate('vt_test_keyspace', create,
                        populate)

  tablet_62344.start_vttablet()
  err = tablet_62344.mysqlctl('-port %u -mysql-port %u multisnapshot --tables=vt_insert_test_1,vt_insert_test_2,vt_insert_test_3 --spec=-0000000000000003- vt_test_keyspace id' % (tablet_62344.port, tablet_62344.mysql_port)).wait()
  if err != 0:
    raise utils.TestError('mysqlctl multisnapshot failed')
  if os.path.exists(os.path.join(utils.vtdataroot, 'snapshot/vt_0000062344/data/vt_test_keyspace-,0000000000000003/vt_insert_test_4.csv.gz')):
    raise utils.TestError("Table vt_insert_test_4 wasn't supposed to be dumped.")
  for kr in 'vt_test_keyspace-,0000000000000003', 'vt_test_keyspace-0000000000000003,':
    path = os.path.join(utils.vtdataroot, 'snapshot/vt_0000062344/data/', kr, 'vt_insert_test_1.0.csv.gz')
    with gzip.open(path) as f:
      if len(f.readlines()) != 2:
        raise utils.TestError("Data looks wrong in %s" % path)
Beispiel #17
0
def start_tablets():
  global __tablets
  # start tablets
  for t in __tablets:
    t.start_vttablet(wait_for_state=None)

  # wait for them to come in serving state
  for t in __tablets:
    t.wait_for_vttablet_state('SERVING')

  # InitShardMaster for master tablets
  for t in __tablets:
    if t.tablet_type == 'master':
      utils.run_vtctl(['InitShardMaster', t.keyspace+'/'+t.shard,
                       t.tablet_alias], auto_log=True)

  for ks in topo_schema.keyspaces:
    ks_name = ks[0]
    ks_type = ks[1]
    utils.run_vtctl(['RebuildKeyspaceGraph', ks_name],
                     auto_log=True)
    if ks_type == shard_constants.RANGE_SHARDED:
      utils.check_srv_keyspace('test_nj', ks_name,
                               'Partitions(master): -80 80-\n'
                               'Partitions(rdonly): -80 80-\n'
                               'Partitions(replica): -80 80-\n')
Beispiel #18
0
  def test_actions_and_timeouts(self):
    # Start up a master mysql and vttablet
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    utils.validate_topology()
    tablet_62344.create_db('vt_test_keyspace')
    tablet_62344.start_vttablet()

    utils.run_vtctl(['Ping', tablet_62344.tablet_alias])

    # schedule long action in the background, sleep a little bit to make sure
    # it started to run
    args = (environment.binary_args('vtctl') +
            environment.topo_server().flags() +
            ['-tablet_manager_protocol',
             protocols_flavor().tablet_manager_protocol(),
             '-tablet_protocol', protocols_flavor().tabletconn_protocol(),
             '-log_dir', environment.vtlogroot,
             'Sleep', tablet_62344.tablet_alias, '10s'])
    bg = utils.run_bg(args)
    time.sleep(3)

    # try a frontend RefreshState that should timeout as the tablet is busy
    # running the other one
    _, stderr = utils.run_vtctl(
        ['-wait-time', '3s', 'RefreshState', tablet_62344.tablet_alias],
        expect_fail=True)
    self.assertIn(protocols_flavor().rpc_timeout_message(), stderr)

    # wait for the background vtctl
    bg.wait()

    tablet_62344.kill_vttablet()
Beispiel #19
0
def run_test_sigterm():
  utils.zk_wipe()
  utils.run_vtctl('CreateKeyspace -force test_keyspace')

  # create the database so vttablets start, as it is serving
  tablet_62344.create_db('vt_test_keyspace')

  tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)

  # start a 'vtctl Sleep' command in the background
  sp = utils.run_bg(utils.vtroot+'/bin/vtctl -logfile=/dev/null Sleep %s 60s' %
                    tablet_62344.tablet_alias,
                    stdout=PIPE, stderr=PIPE)

  # wait for it to start, and let's kill it
  time.sleep(2.0)
  utils.run(['pkill', 'vtaction'])
  out, err = sp.communicate()

  # check the vtctl command got the right remote error back
  if "vtaction interrupted by signal" not in err:
    raise utils.TestError("cannot find expected output in error:", err)
  utils.debug("vtaction was interrupted correctly:\n" + err)

  tablet_62344.kill_vttablet()
 def set_up(self):
   try:
     environment.topo_server_setup()
     utils.wait_procs([t.init_mysql() for t in self.tablets])
     utils.run_vtctl(['CreateKeyspace', self.keyspace])
     utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', self.keyspace, 'keyspace_id', 'uint64'])
     for t in self.tablets:
       t.init_tablet(t.type, keyspace=self.keyspace, shard=t.shard)
     utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True)
     for t in self.tablets:
       t.create_db('vt_' + self.keyspace)
       t.mquery(t.dbname, create_table)
       t.start_vttablet(wait_for_state=None)
     for t in self.tablets:
       t.wait_for_vttablet_state('SERVING')
     for t in self.tablets:
       if t.type == "master":
         utils.run_vtctl(['ReparentShard', '-force', self.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True)
     utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True)
     self.vtgate_server, self.vtgate_port = utils.vtgate_start()
     vtgate_client = zkocc.ZkOccConnection("localhost:%u" % self.vtgate_port, "test_nj", 30.0)
     topology.read_topology(vtgate_client)
   except:
     self.shutdown()
     raise
Beispiel #21
0
  def _test_reparent_from_outside_check(self, brutal, base_time):

    # make sure the shard replication graph is fine
    shard_replication = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                              'test_keyspace/0'])
    hashed_nodes = {}
    for node in shard_replication['nodes']:
      key = node['tablet_alias']['cell']+'-'+str(node['tablet_alias']['uid'])
      hashed_nodes[key] = True
    logging.debug('Got shard replication nodes: %s', str(hashed_nodes))
    expected_nodes = {
        'test_nj-41983': True,
        'test_nj-62044': True,
        }
    if not brutal:
      expected_nodes['test_nj-62344'] = True
    self.assertEqual(expected_nodes, hashed_nodes,
                     'Got unexpected nodes: %s != %s' % (str(expected_nodes),
                                                         str(hashed_nodes)))

    # make sure the master status page says it's the master
    tablet_62044_master_status = tablet_62044.get_status()
    self.assertIn('Serving graph: test_keyspace 0 master',
                  tablet_62044_master_status)

    # make sure the master health stream says it's the master too
    # (health check is disabled on these servers, force it first)
    utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'])
    health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                   '-count', '1',
                                   tablet_62044.tablet_alias])
    self.assertEqual(health['target']['tablet_type'], topodata_pb2.MASTER)
    # have to compare the int version, or the rounding errors can break
    self.assertTrue(
        health['tablet_externally_reparented_timestamp'] >= int(base_time))
Beispiel #22
0
def setup_tablets():
  # Start up a master mysql and vttablet
  logging.debug("Setting up tablets")
  utils.run_vtctl('CreateKeyspace test_keyspace')
  master_tablet.init_tablet('master', 'test_keyspace', '0')
  replica_tablet.init_tablet('replica', 'test_keyspace', '0')
  utils.run_vtctl('RebuildShardGraph test_keyspace/0')
  utils.validate_topology()
  master_tablet.create_db('vt_test_keyspace')
  replica_tablet.create_db('vt_test_keyspace')

  utils.run_vtctl('RebuildKeyspaceGraph test_keyspace')
  zkocc_server = utils.zkocc_start()

  master_tablet.start_vttablet()
  replica_tablet.start_vttablet()
  utils.run_vtctl('SetReadWrite ' + master_tablet.tablet_alias)
  utils.check_db_read_write(62344)

  for t in [master_tablet, replica_tablet]:
    t.reset_replication()
  utils.run_vtctl('ReparentShard -force test_keyspace/0 ' + master_tablet.tablet_alias, auto_log=True)

  # reset counter so tests don't assert
  tablet.Tablet.tablets_running = 0
  setup_schema()
  master_tablet.vquery("set vt_schema_reload_time=86400", path="test_keyspace/0")
  replica_tablet.vquery("set vt_schema_reload_time=86400", path="test_keyspace/0")
Beispiel #23
0
  def init_tablet(self, tablet_type, keyspace, shard,
                  start=False, dbname=None, parent=True, wait_for_start=True,
                  include_mysql_port=True, **kwargs):
    self.tablet_type = tablet_type
    self.keyspace = keyspace
    self.shard = shard

    self.dbname = dbname or ('vt_' + self.keyspace)

    args = ['InitTablet',
            '-hostname', 'localhost',
            '-port', str(self.port)]
    if include_mysql_port:
      args.extend(['-mysql_port', str(self.mysql_port)])
    if parent:
      args.append('-parent')
    if dbname:
      args.extend(['-db_name_override', dbname])
    if keyspace:
      args.extend(['-keyspace', keyspace])
    if shard:
      args.extend(['-shard', shard])
    args.extend([self.tablet_alias, tablet_type])
    utils.run_vtctl(args)
    if start:
      if not wait_for_start:
        expected_state = None
      elif (tablet_type == 'master' or tablet_type == 'replica' or
            tablet_type == 'rdonly' or tablet_type == 'batch'):
        expected_state = 'SERVING'
      else:
        expected_state = 'NOT_SERVING'
      self.start_vttablet(wait_for_state=expected_state, **kwargs)
Beispiel #24
0
  def test_vtgate_qps(self):
    # create the topology
    utils.run_vtctl('CreateKeyspace test_keyspace')
    t = tablet.Tablet(tablet_uid=1, cell="nj")
    t.init_tablet("master", "test_keyspace", "0")
    t.update_addrs()
    utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True)

    # start vtgate and the qps-er
    vtgate_proc, vtgate_port = utils.vtgate_start()
    qpser = utils.run_bg(environment.binary_path('zkclient2')+' -server localhost:%u -mode qps2 test_nj test_keyspace' % vtgate_port)
    time.sleep(10)
    utils.kill_sub_process(qpser)

    # get the vtgate vars, make sure we have what we need
    v = utils.get_vars(vtgate_port)

    # some checks on performance / stats
    # a typical workstation will do 38-40k QPS, check we have more than 10k
    rpcCalls = v['TopoReaderRpcQueryCount']['test_nj']
    if rpcCalls < 100000:
      self.fail('QPS is too low: %u < 10000' % (rpcCalls / 10))
    else:
      logging.debug("Recorded qps: %u", rpcCalls / 10)
    utils.vtgate_kill(vtgate_proc)
Beispiel #25
0
  def _check_query_service(self, tablet, serving, tablet_control_disabled):
    """_check_query_service will check that the query service is enabled
    or disabled on the tablet. It will also check if the tablet control
    status is the reason for being enabled / disabled.

    It will also run a remote RunHealthCheck to be sure it doesn't change
    the serving state.
    """
    tablet_vars = utils.get_vars(tablet.port)
    if serving:
      expected_state = 'SERVING'
    else:
      expected_state = 'NOT_SERVING'
    self.assertEqual(tablet_vars['TabletStateName'], expected_state, 'tablet %s is not in the right serving state: got %s expected %s' % (tablet.tablet_alias, tablet_vars['TabletStateName'], expected_state))

    status = tablet.get_status()
    if tablet_control_disabled:
      self.assertIn("Query Service disabled by TabletControl", status)
    else:
      self.assertNotIn("Query Service disabled by TabletControl", status)

    if tablet.tablet_type == 'rdonly':
      utils.run_vtctl(['RunHealthCheck', tablet.tablet_alias, 'rdonly'],
                      auto_log=True)

      tablet_vars = utils.get_vars(tablet.port)
      if serving:
        expected_state = 'SERVING'
      else:
        expected_state = 'NOT_SERVING'
      self.assertEqual(tablet_vars['TabletStateName'], expected_state, 'tablet %s is not in the right serving state after health check: got %s expected %s' % (tablet.tablet_alias, tablet_vars['TabletStateName'], expected_state))
Beispiel #26
0
  def setUpClass(klass):
    utils.run_vtctl('CreateKeyspace test_keyspace')

    shard_0_master.init_tablet( 'master',  'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_spare.init_tablet('spare', 'test_keyspace', '-80')
    shard_1_master.init_tablet( 'master',  'test_keyspace', '80-')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')
    idle.init_tablet('idle')
    scrap.init_tablet('idle')

    utils.run_vtctl('RebuildShardGraph /zk/global/vt/keyspaces/test_keyspace/shards/*', auto_log=True)
    utils.run_vtctl('RebuildKeyspaceGraph /zk/global/vt/keyspaces/*', auto_log=True)

    for t in assigned:
      t.create_db('vt_test_keyspace')
      t.start_vttablet()

    for t in scrap, idle, shard_0_spare:
      t.start_vttablet(wait_for_state='NOT_SERVING')

    scrap.scrap()

    utils.run_vtctl('ReparentShard -force test_keyspace/-80 ' + shard_0_master.tablet_alias, auto_log=True)
    utils.run_vtctl('ReparentShard -force test_keyspace/80- ' + shard_1_master.tablet_alias, auto_log=True)


  # run checks now before we start the tablets
    utils.validate_topology()
  def test_service_disabled(self):
    # perform some inserts, then change state to stop the invalidator
    self.perform_insert(500)
    inv_before = self.replica_stats()['Totals']['Invalidations']
    invStats_before = self.replica_vars()
    utils.run_vtctl(['ChangeSlaveType', replica_tablet.tablet_alias, 'spare'])

    # wait until it's stopped
    for timeout in xrange(300):
      time.sleep(0.1)
      invStats_after = self.replica_vars()
      logging.debug("Got state %s" %
                    invStats_after["RowcacheInvalidationState"])
      if invStats_after["RowcacheInvalidationState"] == "Disabled":
        break

    # check all data is right
    inv_after = self.replica_stats()['Totals']['Invalidations']
    invStats_after = self.replica_vars()
    logging.debug("Tablet Replica->Spare\n\tBefore: Invalidations: %d InvalidatorStats %s\n\tAfter: Invalidations: %d InvalidatorStats %s" % (inv_before, invStats_before['RowcacheInvalidationCheckPoint'], inv_after, invStats_after['RowcacheInvalidationCheckPoint']))
    self.assertEqual(inv_after, 0,
                     "Row-cache invalid. should be disabled, no invalidations")
    self.assertEqual(invStats_after["RowcacheInvalidationState"], "Disabled",
                     "Row-cache invalidator should be disabled")

    # and restore the type
    utils.run_vtctl(['ChangeSlaveType', replica_tablet.tablet_alias, 'replica'])
Beispiel #28
0
    def setUpClass(klass):
        utils.run_vtctl("CreateKeyspace test_keyspace")

        shard_0_master.init_tablet("master", "test_keyspace", "-80")
        shard_0_replica.init_tablet("replica", "test_keyspace", "-80")
        shard_0_spare.init_tablet("spare", "test_keyspace", "-80")
        shard_1_master.init_tablet("master", "test_keyspace", "80-")
        shard_1_replica.init_tablet("replica", "test_keyspace", "80-")
        idle.init_tablet("idle")
        scrap.init_tablet("idle")

        utils.run_vtctl("RebuildShardGraph /zk/global/vt/keyspaces/test_keyspace/shards/*", auto_log=True)
        utils.run_vtctl("RebuildKeyspaceGraph /zk/global/vt/keyspaces/*", auto_log=True)

        for t in assigned:
            t.create_db("vt_test_keyspace")
            t.start_vttablet()

        for t in scrap, idle, shard_0_spare:
            t.start_vttablet(wait_for_state="NOT_SERVING")

        scrap.scrap()

        for t in [shard_0_master, shard_0_replica, shard_0_spare, shard_1_master, shard_1_replica, idle, scrap]:
            t.reset_replication()
        utils.run_vtctl("ReparentShard -force test_keyspace/-80 " + shard_0_master.tablet_alias, auto_log=True)
        utils.run_vtctl("ReparentShard -force test_keyspace/80- " + shard_1_master.tablet_alias, auto_log=True)

        # run checks now before we start the tablets
        utils.validate_topology()
Beispiel #29
0
  def init_tablet(self, tablet_type, keyspace=None, shard=None, force=True, start=False, dbname=None, parent=True, wait_for_start=True, **kwargs):
    self.keyspace = keyspace
    self.shard = shard

    if dbname is None:
      self.dbname = "vt_" + (self.keyspace or "database")
    else:
      self.dbname = dbname

    args = ['InitTablet',
            '-hostname', 'localhost',
            '-port', str(self.port),
            '-mysql_port', str(self.mysql_port),
            ]
    if force:
      args.append('-force')
    if parent:
      args.append('-parent')
    if dbname:
      args.extend(['-db-name-override', dbname])
    if keyspace:
      args.extend(['-keyspace', keyspace])
    if shard:
      args.extend(['-shard', shard])
    args.extend([self.tablet_alias, tablet_type])
    utils.run_vtctl(args)
    if start:
      if not wait_for_start:
        expected_state = None
      elif tablet_type == 'master' or tablet_type == 'replica' or tablet_type == 'rdonly' or tablet_type == 'batch':
        expected_state = "SERVING"
      else:
        expected_state = "NOT_SERVING"
      self.start_vttablet(wait_for_state=expected_state, **kwargs)
Beispiel #30
0
    def test_topo_read_threshold(self):
        before_topo_rtt = get_topo_rtt()
        # Check original state.
        keyspace_obj = topology.get_keyspace("test_keyspace")
        self.assertNotEqual(keyspace_obj, None, "test_keyspace should be not None")
        self.assertEqual(
            keyspace_obj.sharding_col_type,
            keyrange_constants.KIT_UINT64,
            "ShardingColumnType be %s" % keyrange_constants.KIT_UINT64,
        )

        # Change the keyspace object.
        utils.run_vtctl(
            ["SetKeyspaceShardingInfo", "-force", "test_keyspace", "keyspace_id", keyrange_constants.KIT_BYTES]
        )
        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

        # sleep throttle interval and check values again.
        # the keyspace should have changed and also caused a rtt to topo server.
        time.sleep(self.keyspace_fetch_throttle)
        topology.refresh_keyspace(self.vtgate_client, "test_keyspace")
        keyspace_obj = topology.get_keyspace("test_keyspace")
        after_1st_clear = get_topo_rtt()
        self.assertEqual(after_1st_clear - before_topo_rtt, 1, "One additional round-trips to topo server")
        self.assertEqual(
            keyspace_obj.sharding_col_type,
            keyrange_constants.KIT_BYTES,
            "ShardingColumnType be %s" % keyrange_constants.KIT_BYTES,
        )

        # Refresh without sleeping for throttle time shouldn't cause additional rtt.
        topology.refresh_keyspace(self.vtgate_client, "test_keyspace")
        keyspace_obj = topology.get_keyspace("test_keyspace")
        after_2nd_clear = get_topo_rtt()
        self.assertEqual(after_2nd_clear - after_1st_clear, 0, "No additional round-trips to topo server")
Beispiel #31
0
    def test_delete_keyspace(self):
        utils.run_vtctl(['CreateKeyspace', 'test_delete_keyspace'])
        utils.run_vtctl(['CreateShard', 'test_delete_keyspace/0'])
        utils.run_vtctl([
            'InitTablet', '-keyspace=test_delete_keyspace', '-shard=0',
            'test_nj-0000000100', 'master'
        ])

        # Can't delete keyspace if there are shards present.
        utils.run_vtctl(['DeleteKeyspace', 'test_delete_keyspace'],
                        expect_fail=True)
        # Can't delete shard if there are tablets present.
        utils.run_vtctl(
            ['DeleteShard', '-even_if_serving', 'test_delete_keyspace/0'],
            expect_fail=True)

        # Use recursive DeleteShard to remove tablets.
        utils.run_vtctl([
            'DeleteShard', '-even_if_serving', '-recursive',
            'test_delete_keyspace/0'
        ])
        # Now non-recursive DeleteKeyspace should work.
        utils.run_vtctl(['DeleteKeyspace', 'test_delete_keyspace'])

        # Start over and this time use recursive DeleteKeyspace to do everything.
        utils.run_vtctl(['CreateKeyspace', 'test_delete_keyspace'])
        utils.run_vtctl(['CreateShard', 'test_delete_keyspace/0'])
        utils.run_vtctl([
            'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace',
            '-shard=0', 'test_nj-0000000100', 'master'
        ])

        # Create the serving/replication entries and check that they exist,
        # so we can later check they're deleted.
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'])
        utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace'])

        # Recursive DeleteKeyspace
        utils.run_vtctl(
            ['DeleteKeyspace', '-recursive', 'test_delete_keyspace'])

        # Check that everything is gone.
        utils.run_vtctl(['GetKeyspace', 'test_delete_keyspace'],
                        expect_fail=True)
        utils.run_vtctl(['GetShard', 'test_delete_keyspace/0'],
                        expect_fail=True)
        utils.run_vtctl(['GetTablet', 'test_nj-0000000100'], expect_fail=True)
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'],
            expect_fail=True)
        utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace'],
                        expect_fail=True)
Beispiel #32
0
    def test_health_check_drained_state_does_not_shutdown_query_service(self):
        # This test is similar to test_health_check, but has the following
        # differences:
        # - the second tablet is an 'rdonly' and not a 'replica'
        # - the second tablet will be set to 'drained' and we expect that
        #   the query service won't be shutdown

        # Setup master and rdonly tablets.
        tablet_62344.init_tablet('replica', 'test_keyspace', '0')

        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')

        # Note we only have a master and a rdonly. So we can't enable
        # semi-sync in this case, as the rdonly slaves don't semi-sync ack.
        tablet_62344.start_vttablet(wait_for_state=None,
                                    enable_semi_sync=False)
        tablet_62044.start_vttablet(wait_for_state=None,
                                    init_tablet_type='rdonly',
                                    init_keyspace='test_keyspace',
                                    init_shard='0',
                                    enable_semi_sync=False)

        tablet_62344.wait_for_vttablet_state('NOT_SERVING')
        tablet_62044.wait_for_vttablet_state('NOT_SERVING')
        self.check_healthz(tablet_62044, False)

        # Enable replication.
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            tablet_62344.tablet_alias
        ])

        # Trigger healthcheck to save time waiting for the next interval.
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        tablet_62044.wait_for_vttablet_state('SERVING')
        self.check_healthz(tablet_62044, True)

        # Change from rdonly to drained and stop replication. (These
        # actions are similar to the SplitClone vtworker command
        # implementation.)  The tablet will stay healthy, and the
        # query service is still running.
        utils.run_vtctl(
            ['ChangeSlaveType', tablet_62044.tablet_alias, 'drained'])
        # Trying to drain the same tablet again, should error
        try:
            utils.run_vtctl(
                ['ChangeSlaveType', tablet_62044.tablet_alias, 'drained'])
        except Exception as e:
            s = str(e)
            self.assertIn("already drained", s)
        utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias])
        # Trigger healthcheck explicitly to avoid waiting for the next interval.
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        utils.wait_for_tablet_type(tablet_62044.tablet_alias, 'drained')
        self.check_healthz(tablet_62044, True)
        # Query service is still running.
        tablet_62044.wait_for_vttablet_state('SERVING')

        # Restart replication. Tablet will become healthy again.
        utils.run_vtctl(
            ['ChangeSlaveType', tablet_62044.tablet_alias, 'rdonly'])
        utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        self.check_healthz(tablet_62044, True)

        # kill the tablets
        tablet.kill_tablets([tablet_62344, tablet_62044])
Beispiel #33
0
    def test_health_check(self):
        # one master, one replica that starts not initialized
        # (for the replica, we let vttablet do the InitTablet)
        tablet_62344.init_tablet('replica', 'test_keyspace', '0')

        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')

        tablet_62344.start_vttablet(wait_for_state=None)
        tablet_62044.start_vttablet(wait_for_state=None,
                                    lameduck_period='5s',
                                    init_tablet_type='replica',
                                    init_keyspace='test_keyspace',
                                    init_shard='0')

        tablet_62344.wait_for_vttablet_state('NOT_SERVING')
        tablet_62044.wait_for_vttablet_state('NOT_SERVING')
        self.check_healthz(tablet_62044, False)

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            tablet_62344.tablet_alias
        ])

        # make sure the unhealthy slave goes to healthy
        tablet_62044.wait_for_vttablet_state('SERVING')
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        self.check_healthz(tablet_62044, True)

        # make sure the master is still master
        ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias])
        self.assertEqual(ti['type'], topodata_pb2.MASTER,
                         'unexpected master type: %s' % ti['type'])

        # stop replication at the mysql level.
        tablet_62044.mquery('', 'stop slave')
        # vttablet replication_reporter should restart it.
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        # insert something on the master and wait for it on the slave.
        tablet_62344.mquery('vt_test_keyspace', [
            'create table repl_test_table (id int)',
            'insert into repl_test_table values (123)'
        ],
                            write=True)
        timeout = 10.0
        while True:
            try:
                result = tablet_62044.mquery('vt_test_keyspace',
                                             'select * from repl_test_table')
                if result:
                    self.assertEqual(result[0][0], 123L)
                    break
            except MySQLdb.ProgrammingError:
                # Maybe the create table hasn't gone trough yet, we wait more
                logging.exception(
                    'got this exception waiting for data, ignoring it')
            timeout = utils.wait_step(
                'slave replication repaired by replication_reporter', timeout)

        # stop replication, make sure we don't go unhealthy.
        # (we have a baseline as well, so the time should be good).
        utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias])
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        self.check_healthz(tablet_62044, True)

        # make sure status web page is healthy
        self.assertRegexpMatches(tablet_62044.get_status(), healthy_expr)

        # make sure the health stream is updated
        health = utils.run_vtctl_json(
            ['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias])
        self.assertTrue(
            ('seconds_behind_master' not in health['realtime_stats'])
            or (health['realtime_stats']['seconds_behind_master'] < 30),
            'got unexpected health: %s' % str(health))
        self.assertIn('serving', health)

        # then restart replication, make sure we stay healthy
        utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])

        # make sure status web page is healthy
        self.assertRegexpMatches(tablet_62044.get_status(), healthy_expr)

        # now test VtTabletStreamHealth returns the right thing
        stdout, _ = utils.run_vtctl(
            ['VtTabletStreamHealth', '-count', '2', tablet_62044.tablet_alias],
            trap_output=True,
            auto_log=True)
        lines = stdout.splitlines()
        self.assertEqual(len(lines), 2)
        for line in lines:
            logging.debug('Got health: %s', line)
            data = json.loads(line)
            self.assertIn('realtime_stats', data)
            self.assertIn('serving', data)
            self.assertTrue(data['serving'])
            self.assertNotIn('health_error', data['realtime_stats'])
            self.assertNotIn('tablet_externally_reparented_timestamp', data)
            self.assertEqual('test_keyspace', data['target']['keyspace'])
            self.assertEqual('0', data['target']['shard'])
            self.assertEqual(topodata_pb2.REPLICA,
                             data['target']['tablet_type'])

        # Test that VtTabletStreamHealth reports a QPS >0.0.
        # Therefore, issue several reads first.
        # NOTE: This may be potentially flaky because we'll observe a QPS >0.0
        #       exactly "once" for the duration of one sampling interval (5s) and
        #       after that we'll see 0.0 QPS rates again. If this becomes actually
        #       flaky, we need to read continuously in a separate thread.
        for _ in range(10):
            tablet_62044.execute('select 1 from dual')
        # This may take up to 5 seconds to become true because we sample the query
        # counts for the rates only every 5 seconds (see query_service_stats.go).
        timeout = 10
        while True:
            health = utils.run_vtctl_json([
                'VtTabletStreamHealth', '-count', '1',
                tablet_62044.tablet_alias
            ])
            if health['realtime_stats'].get('qps', 0.0) > 0.0:
                break
            timeout = utils.wait_step('QPS >0.0 seen', timeout)

        # kill the tablets
        tablet.kill_tablets([tablet_62344, tablet_62044])
Beispiel #34
0
    def _test_sanity(self):
        # Start up a master mysql and vttablet
        utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
        utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
        tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
        utils.validate_topology()

        # if these statements don't run before the tablet it will wedge
        # waiting for the db to become accessible. this is more a bug than
        # a feature.
        tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
                              self._populate_vt_select_test)

        tablet_62344.start_vttablet()

        # make sure the query service is started right away.
        qr = tablet_62344.execute('select id, msg from vt_select_test')
        self.assertEqual(len(qr['rows']), 4,
                         'expected 4 rows in vt_select_test: %s' % str(qr))
        self.assertEqual(qr['fields'][0]['name'], 'id')
        self.assertEqual(qr['fields'][1]['name'], 'msg')

        # test exclude_field_names to vttablet works as expected.
        qr = tablet_62344.execute('select id, msg from vt_select_test',
                                  execute_options='included_fields:TYPE_ONLY ')
        self.assertEqual(len(qr['rows']), 4,
                         'expected 4 rows in vt_select_test: %s' % str(qr))
        self.assertNotIn('name', qr['fields'][0])
        self.assertNotIn('name', qr['fields'][1])

        # make sure direct dba queries work
        query_result = utils.run_vtctl_json([
            'ExecuteFetchAsDba', '-json', tablet_62344.tablet_alias,
            'select * from vt_test_keyspace.vt_select_test'
        ])
        self.assertEqual(
            len(query_result['rows']), 4,
            'expected 4 rows in vt_select_test: %s' % str(query_result))
        self.assertEqual(
            len(query_result['fields']), 2,
            'expected 2 fields in vt_select_test: %s' % str(query_result))

        # check Ping / RefreshState / RefreshStateByShard
        utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
        utils.run_vtctl(['RefreshState', tablet_62344.tablet_alias])
        utils.run_vtctl(['RefreshStateByShard', 'test_keyspace/0'])
        utils.run_vtctl(
            ['RefreshStateByShard', '--cells=test_nj', 'test_keyspace/0'])

        # Quickly check basic actions.
        utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias])
        utils.wait_db_read_only(62344)

        utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias])
        utils.check_db_read_write(62344)

        utils.validate_topology()
        utils.run_vtctl(['ValidateKeyspace', 'test_keyspace'])
        # not pinging tablets, as it enables replication checks, and they
        # break because we only have a single master, no slaves
        utils.run_vtctl(
            ['ValidateShard', '-ping-tablets=false', 'test_keyspace/0'])

        tablet_62344.kill_vttablet()
Beispiel #35
0
    def test_sharding(self):

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # run checks now before we start the tablets
        utils.validate_topology()

        # create databases, start the tablets, wait for them to start
        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            t.wait_for_vttablet_state('SERVING')

        # apply the schema on the first shard through vtctl, so all tablets
        # are the same (replication is not enabled yet, so allow_replication=false
        # is just there to be tested)
        utils.run_vtctl([
            'ApplySchema', '-stop-replication',
            '-sql=' + create_vt_select_test.replace("\n", ""),
            shard_0_master.tablet_alias
        ])
        utils.run_vtctl([
            'ApplySchema', '-stop-replication',
            '-sql=' + create_vt_select_test.replace("\n", ""),
            shard_0_replica.tablet_alias
        ])

        if environment.topo_server_implementation == 'zookeeper':
            # start zkocc, we'll use it later, indirectly with the vtdb-zkocc driver
            zkocc_server = utils.zkocc_start()

        # start vtgate, we'll use it later
        vtgate_server, vtgate_port = utils.vtgate_start()

        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            t.reset_replication()
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # apply the schema on the second shard using a simple schema upgrade
        utils.run_vtctl([
            'ApplySchemaShard', '-simple',
            '-sql=' + create_vt_select_test_reverse.replace("\n", ""),
            'test_keyspace/80-'
        ])

        # insert some values directly (db is RO after minority reparent)
        # FIXME(alainjobart) these values don't match the shard map
        utils.run_vtctl(['SetReadWrite', shard_0_master.tablet_alias])
        utils.run_vtctl(['SetReadWrite', shard_1_master.tablet_alias])
        shard_0_master.mquery(
            'vt_test_keyspace',
            "insert into vt_select_test (id, msg) values (1, 'test 1')",
            write=True)
        shard_1_master.mquery(
            'vt_test_keyspace',
            "insert into vt_select_test (id, msg) values (10, 'test 10')",
            write=True)

        utils.validate_topology(ping_tablets=True)

        utils.pause("Before the sql scatter query")

        # note the order of the rows is not guaranteed, as the go routines
        # doing the work can go out of order
        self._check_rows(["Index\tid\tmsg", "1\ttest 1", "10\ttest 10"])

        # write a value, re-read them all
        utils.vtclient2(
            3803,
            "/test_nj/test_keyspace/master",
            "insert into vt_select_test (id, msg) values (:keyspace_id, 'test 2')",
            bindvars='{"keyspace_id": 2}',
            driver="vtdb",
            verbose=True)
        self._check_rows(
            ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"])

        # make sure the '2' value was written on first shard
        rows = shard_0_master.mquery(
            'vt_test_keyspace',
            "select id, msg from vt_select_test order by id")
        self.assertEqual(rows, (
            (1, 'test 1'),
            (2, 'test 2'),
        ), 'wrong mysql_query output: %s' % str(rows))

        utils.pause("After db writes")

        # now use various topo servers and streaming or both for the same query
        self._check_rows(
            ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"],
            driver="vtdb-streaming")
        if environment.topo_server_implementation == 'zookeeper':
            self._check_rows(
                ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"],
                driver="vtdb-zk")
            self._check_rows(
                ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"],
                driver="vtdb-zk-streaming")
            self._check_rows(
                ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"],
                driver="vtdb-zkocc")
            self._check_rows(
                ["Index\tid\tmsg", "1\ttest 1", "2\ttest 2", "10\ttest 10"],
                driver="vtdb-zkocc-streaming")

        # make sure the schema checking works
        self._check_rows_schema_diff("vtdb")
        if environment.topo_server_implementation == 'zookeeper':
            self._check_rows_schema_diff("vtdb-zk")
            self._check_rows_schema_diff("vtdb-zkocc")

        # throw in some schema validation step
        # we created the schema differently, so it should show
        utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/-80'])
        utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/80-'])
        out, err = utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                                   trap_output=True,
                                   raise_on_error=False)
        if 'test_nj-0000062344 and test_nj-0000062346 disagree on schema for table vt_select_test:\nCREATE TABLE' not in err or \
           'test_nj-0000062344 and test_nj-0000062347 disagree on schema for table vt_select_test:\nCREATE TABLE' not in err:
            self.fail('wrong ValidateSchemaKeyspace output: ' + err)

        # validate versions
        utils.run_vtctl(['ValidateVersionShard', 'test_keyspace/-80'],
                        auto_log=True)
        utils.run_vtctl(['ValidateVersionKeyspace', 'test_keyspace'],
                        auto_log=True)

        # show and validate permissions
        utils.run_vtctl(['GetPermissions', 'test_nj-0000062344'],
                        auto_log=True)
        utils.run_vtctl(['ValidatePermissionsShard', 'test_keyspace/-80'],
                        auto_log=True)
        utils.run_vtctl(['ValidatePermissionsKeyspace', 'test_keyspace'],
                        auto_log=True)

        if environment.topo_server_implementation == 'zookeeper':
            # and create zkns on this complex keyspace, make sure a few files are created
            utils.run_vtctl(['ExportZknsForKeyspace', 'test_keyspace'])
            out, err = utils.run(environment.binary_argstr('zk') +
                                 ' ls -R /zk/test_nj/zk?s/vt/test_keysp*',
                                 trap_output=True)
            lines = out.splitlines()
            for base in ['-80', '80-']:
                for db_type in ['master', 'replica']:
                    for sub_path in ['', '.vdns', '/0', '/_vtocc.vdns']:
                        expected = '/zk/test_nj/zkns/vt/test_keyspace/' + base + '/' + db_type + sub_path
                        if expected not in lines:
                            self.fail('missing zkns part:\n%s\nin:%s' %
                                      (expected, out))

        # now try to connect using the python client and shard-aware connection
        # to both shards
        # first get the topology and check it
        vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                              "test_nj", 30.0)
        topology.read_keyspaces(vtgate_client)

        shard_0_master_addrs = topology.get_host_port_by_name(
            vtgate_client, "test_keyspace.-80.master:_vtocc")
        if len(shard_0_master_addrs) != 1:
            self.fail(
                'topology.get_host_port_by_name failed for "test_keyspace.-80.master:_vtocc", got: %s'
                % " ".join([
                    "%s:%u(%s)" % (h, p, str(e))
                    for (h, p, e) in shard_0_master_addrs
                ]))
        logging.debug(
            "shard 0 master addrs: %s", " ".join([
                "%s:%u(%s)" % (h, p, str(e))
                for (h, p, e) in shard_0_master_addrs
            ]))

        # connect to shard -80
        conn = tablet3.TabletConnection(
            "%s:%u" % (shard_0_master_addrs[0][0], shard_0_master_addrs[0][1]),
            "", "test_keyspace", "-80", 10.0)
        conn.dial()
        (results, rowcount, lastrowid, fields) = conn._execute(
            "select id, msg from vt_select_test order by id", {})
        self.assertEqual(results, [
            (1, 'test 1'),
            (2, 'test 2'),
        ], 'wrong conn._execute output: %s' % str(results))

        # connect to shard 80-
        shard_1_master_addrs = topology.get_host_port_by_name(
            vtgate_client, "test_keyspace.80-.master:_vtocc")
        conn = tablet3.TabletConnection(
            "%s:%u" % (shard_1_master_addrs[0][0], shard_1_master_addrs[0][1]),
            "", "test_keyspace", "80-", 10.0)
        conn.dial()
        (results, rowcount, lastrowid, fields) = conn._execute(
            "select id, msg from vt_select_test order by id", {})
        self.assertEqual(results, [
            (10, 'test 10'),
        ], 'wrong conn._execute output: %s' % str(results))
        vtgate_client.close()

        # try to connect with bad shard
        try:
            conn = tablet3.TabletConnection(
                "localhost:%u" % shard_0_master.port, "", "test_keyspace",
                "-90", 10.0)
            conn.dial()
            self.fail('expected an exception')
        except Exception as e:
            if "fatal: Shard mismatch, expecting -80, received -90" not in str(
                    e):
                self.fail('unexpected exception: ' + str(e))

        utils.vtgate_kill(vtgate_server)
        if environment.topo_server_implementation == 'zookeeper':
            utils.kill_sub_process(zkocc_server)
        tablet.kill_tablets(
            [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica])
Beispiel #36
0
 def test_get_srv_keyspace_names(self):
     stdout, _ = utils.run_vtctl(['GetSrvKeyspaceNames', 'test_nj'],
                                 trap_output=True)
     self.assertEqual(set(stdout.splitlines()),
                      {SHARDED_KEYSPACE, UNSHARDED_KEYSPACE})
Beispiel #37
0
    def test_remove_keyspace_cell(self):
        utils.run_vtctl(['CreateKeyspace', 'test_delete_keyspace'])
        utils.run_vtctl(['CreateShard', 'test_delete_keyspace/0'])
        utils.run_vtctl(['CreateShard', 'test_delete_keyspace/1'])
        utils.run_vtctl([
            'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace',
            '-shard=0', 'test_ca-0000000100', 'master'
        ])
        utils.run_vtctl([
            'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace',
            '-shard=1', 'test_ca-0000000101', 'master'
        ])
        utils.run_vtctl([
            'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace',
            '-shard=0', 'test_nj-0000000100', 'replica'
        ])
        utils.run_vtctl([
            'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace',
            '-shard=1', 'test_nj-0000000101', 'replica'
        ])

        # Create the serving/replication entries and check that they exist,
        # so we can later check they're deleted.
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/1'])
        utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace'])
        utils.run_vtctl(['GetSrvKeyspace', 'test_ca', 'test_delete_keyspace'])

        # Just remove the shard from one cell (including tablets),
        # but leaving the global records and other cells/shards alone.
        utils.run_vtctl([
            'RemoveShardCell', '-recursive', 'test_delete_keyspace/0',
            'test_nj'
        ])
        # Check that the shard is gone from test_nj.
        srv_keyspace = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace'])
        for partition in srv_keyspace['partitions']:
            self.assertEqual(
                len(partition['shard_references']), 1,
                'RemoveShardCell should have removed one shard from the target cell: '
                + json.dumps(srv_keyspace))
        # Make sure the shard is still serving in test_ca.
        srv_keyspace = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_ca', 'test_delete_keyspace'])
        for partition in srv_keyspace['partitions']:
            self.assertEqual(
                len(partition['shard_references']), 2,
                'RemoveShardCell should not have changed other cells: ' +
                json.dumps(srv_keyspace))
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace'])

        utils.run_vtctl(['GetKeyspace', 'test_delete_keyspace'])
        utils.run_vtctl(['GetShard', 'test_delete_keyspace/0'])
        utils.run_vtctl(['GetTablet', 'test_ca-0000000100'])
        utils.run_vtctl(['GetTablet', 'test_nj-0000000100'], expect_fail=True)
        utils.run_vtctl(['GetTablet', 'test_nj-0000000101'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_ca', 'test_delete_keyspace/0'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'],
            expect_fail=True)
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/1'])
        utils.run_vtctl(['GetSrvKeyspace', 'test_nj', 'test_delete_keyspace'])

        # Add it back to do another test.
        utils.run_vtctl([
            'InitTablet', '-port=1234', '-keyspace=test_delete_keyspace',
            '-shard=0', 'test_nj-0000000100', 'replica'
        ])
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'])

        # Now use RemoveKeyspaceCell to remove all shards.
        utils.run_vtctl([
            'RemoveKeyspaceCell', '-recursive', 'test_delete_keyspace',
            'test_nj'
        ])
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_delete_keyspace'])

        utils.run_vtctl(
            ['GetShardReplication', 'test_ca', 'test_delete_keyspace/0'])
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/0'],
            expect_fail=True)
        utils.run_vtctl(
            ['GetShardReplication', 'test_nj', 'test_delete_keyspace/1'],
            expect_fail=True)

        # Clean up.
        utils.run_vtctl(
            ['DeleteKeyspace', '-recursive', 'test_delete_keyspace'])
Beispiel #38
0
 def _delete_swap(self, swap_uuid):
   """Delete the schema swap with the given uuid."""
   utils.run_vtctl(['WorkflowDelete', swap_uuid], auto_log=True)
Beispiel #39
0
    def setUpClass(cls):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'keyspace_id',
            '--sharding_column_type', 'uint64', 'test_keyspace'
        ])
        utils.run_vtctl([
            'CreateKeyspace', '--served_from',
            'master:test_keyspace,replica:test_keyspace,rdonly:test_keyspace',
            'redirected_keyspace'
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('spare', 'test_keyspace', '-80')
        shard_0_spare.init_tablet('spare', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'redirected_keyspace'],
                        auto_log=True)

        # start running all the tablets
        for t in [shard_0_master, shard_1_master, shard_1_replica]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             extra_args=utils.vtctld.process_args())
        shard_0_replica.create_db('vt_test_keyspace')
        shard_0_replica.start_vttablet(extra_args=utils.vtctld.process_args(),
                                       target_tablet_type='replica',
                                       wait_for_state=None)

        shard_0_spare.start_vttablet(wait_for_state=None,
                                     extra_args=utils.vtctld.process_args())

        # wait for the right states
        for t in [shard_0_master, shard_1_master, shard_1_replica]:
            t.wait_for_vttablet_state('SERVING')
        for t in [shard_0_replica, shard_0_spare]:
            t.wait_for_vttablet_state('NOT_SERVING')

        for t in [
                shard_0_master, shard_0_replica, shard_0_spare, shard_1_master,
                shard_1_replica
        ]:
            t.reset_replication()
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias
        ],
                        auto_log=True)
        shard_0_replica.wait_for_vttablet_state('SERVING')

        # run checks now
        utils.validate_topology()
Beispiel #40
0
    def test_sharding(self):

        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'keyspace_id',
            '--sharding_column_type', 'uint64', 'test_keyspace'
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')

        # run checks now before we start the tablets
        utils.validate_topology()

        # create databases, start the tablets, wait for them to start
        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)
        for t in [shard_0_master, shard_1_master]:
            t.wait_for_vttablet_state('SERVING')
        for t in [shard_0_replica, shard_1_replica]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # apply the schema on the first shard through vtctl, so all tablets
        # are the same.
        shard_0_master.mquery('vt_test_keyspace',
                              create_vt_select_test.replace('\n', ''),
                              write=True)
        shard_0_replica.mquery('vt_test_keyspace',
                               create_vt_select_test.replace('\n', ''),
                               write=True)

        # apply the schema on the second shard.
        shard_1_master.mquery('vt_test_keyspace',
                              create_vt_select_test_reverse.replace('\n', ''),
                              write=True)
        shard_1_replica.mquery('vt_test_keyspace',
                               create_vt_select_test_reverse.replace('\n', ''),
                               write=True)

        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            utils.run_vtctl(['ReloadSchema', t.tablet_alias])

        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            t.reset_replication()
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # insert some values directly (db is RO after minority reparent)
        # FIXME(alainjobart) these values don't match the shard map
        utils.run_vtctl(['SetReadWrite', shard_0_master.tablet_alias])
        utils.run_vtctl(['SetReadWrite', shard_1_master.tablet_alias])
        shard_0_master.mquery(
            'vt_test_keyspace',
            "insert into vt_select_test (id, msg) values (1, 'test 1')",
            write=True)
        shard_1_master.mquery(
            'vt_test_keyspace',
            "insert into vt_select_test (id, msg) values (10, 'test 10')",
            write=True)

        utils.validate_topology(ping_tablets=True)

        utils.pause('Before the sql scatter query')

        # make sure the '1' value was written on first shard
        rows = shard_0_master.mquery(
            'vt_test_keyspace',
            'select id, msg from vt_select_test order by id')
        self.assertEqual(rows, ((1, 'test 1'), ),
                         'wrong mysql_query output: %s' % str(rows))

        utils.pause('After db writes')

        # throw in some schema validation step
        # we created the schema differently, so it should show
        utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/-80'])
        utils.run_vtctl(['ValidateSchemaShard', 'test_keyspace/80-'])
        _, err = utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                                 trap_output=True,
                                 raise_on_error=False)
        if ('test_nj-0000062344 and test_nj-0000062346 disagree on schema '
                'for table vt_select_test:\nCREATE TABLE' not in err or
                'test_nj-0000062344 and test_nj-0000062347 disagree on schema '
                'for table vt_select_test:\nCREATE TABLE' not in err):
            self.fail('wrong ValidateSchemaKeyspace output: ' + err)

        # validate versions
        utils.run_vtctl(['ValidateVersionShard', 'test_keyspace/-80'],
                        auto_log=True)
        utils.run_vtctl(['ValidateVersionKeyspace', 'test_keyspace'],
                        auto_log=True)

        # show and validate permissions
        utils.run_vtctl(['GetPermissions', 'test_nj-0000062344'],
                        auto_log=True)
        utils.run_vtctl(['ValidatePermissionsShard', 'test_keyspace/-80'],
                        auto_log=True)
        utils.run_vtctl(['ValidatePermissionsKeyspace', 'test_keyspace'],
                        auto_log=True)

        # connect to the tablets directly, make sure they know / validate
        # their own shard
        sql = 'select id, msg from vt_select_test order by id'

        qr = shard_0_master.execute(sql)
        self.assertEqual(qr['rows'], [
            [1, 'test 1'],
        ])

        qr = shard_1_master.execute(sql)
        self.assertEqual(qr['rows'], [
            [10, 'test 10'],
        ])

        # make sure that if we use a wrong target, the destination rejects
        # the query.
        _, stderr = utils.run_vtctl([
            'VtTabletExecute', '-json', '-keyspace', 'test_keyspace', '-shard',
            '-90', '-tablet_type', 'master', shard_0_master.tablet_alias, sql
        ],
                                    expect_fail=True)
        self.assertIn('retry: Invalid shard -90', stderr)

        tablet.kill_tablets(
            [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica])
Beispiel #41
0
        v = utils.get_vars(replica_tablet.port)
        if v['UpdateStreamState'] != 'Enabled':
            self.fail("Update stream service should be 'Enabled' but is '%s'" %
                      v['UpdateStreamState'])
        self.assertTrue('DML' in v['UpdateStreamEvents'])
        self.assertTrue('POS' in v['UpdateStreamEvents'])

        logging.debug('Testing enable -> disable switch starting @ %s',
                      start_position)
        replica_conn = self._get_replica_stream_conn()
        replica_conn.dial()
        disabled_err = False
        txn_count = 0
        try:
            data = replica_conn.stream_start(start_position)
            utils.run_vtctl(
                ['ChangeSlaveType', replica_tablet.tablet_alias, 'spare'])
            utils.wait_for_tablet_type(replica_tablet.tablet_alias, 'spare',
                                       30)
            while data:
                data = replica_conn.stream_next()
                if data is not None and data['Category'] == 'POS':
                    txn_count += 1
            logging.debug('Test Service Switch: FAIL')
            return
        except dbexceptions.DatabaseError, e:
            self.assertEqual(
                'Fatal Service Error: Disconnecting because the Update Stream '
                'service has been disabled', str(e))
        except Exception, e:
            logging.error('Exception: %s', str(e))
            logging.error('Traceback: %s', traceback.print_exc())
Beispiel #42
0
 def _stop_swap(self, swap_uuid):
   """Stop the running schema swap with the given uuid."""
   utils.run_vtctl(['WorkflowStop', swap_uuid], auto_log=True)
Beispiel #43
0
def setUpModule():
  try:
    environment.topo_server().setup()

    logging.debug("Creating certificates")
    os.makedirs(cert_dir)

    # Create CA certificate
    ca_key = cert_dir + "/ca-key.pem"
    ca_cert = cert_dir + "/ca-cert.pem"
    openssl(["genrsa", "-out", cert_dir + "/ca-key.pem"])
    ca_config = cert_dir + "/ca.config"
    with open(ca_config, 'w') as fd:
      fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Mysql CA
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl(["req", "-new", "-x509", "-nodes", "-days", "3600", "-batch",
             "-config", ca_config,
             "-key", ca_key,
             "-out", ca_cert])

    # Create mysql server certificate, remove passphrase, and sign it
    server_key = cert_dir + "/server-key.pem"
    server_cert = cert_dir + "/server-cert.pem"
    server_req = cert_dir + "/server-req.pem"
    server_config = cert_dir + "/server.config"
    with open(server_config, 'w') as fd:
      fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Mysql Server
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl(["req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch",
             "-config", server_config,
             "-keyout", server_key, "-out", server_req])
    openssl(["rsa", "-in", server_key, "-out", server_key])
    openssl(["x509", "-req",
             "-in", server_req,
             "-days", "3600",
             "-CA", ca_cert,
             "-CAkey", ca_key,
             "-set_serial", "01",
             "-out", server_cert])

    # Create mysql client certificate, remove passphrase, and sign it
    client_key = cert_dir + "/client-key.pem"
    client_cert = cert_dir + "/client-cert.pem"
    client_req = cert_dir + "/client-req.pem"
    client_config = cert_dir + "/client.config"
    with open(client_config, 'w') as fd:
      fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Mysql Client
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl(["req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch",
             "-config", client_config,
             "-keyout", client_key, "-out", client_req])
    openssl(["rsa", "-in", client_key, "-out", client_key])
    openssl(["x509", "-req",
             "-in", client_req,
             "-days", "3600",
             "-CA", ca_cert,
             "-CAkey", ca_key,
             "-set_serial", "02",
             "-out", client_cert])

    # Create vt server certificate, remove passphrase, and sign it
    vt_server_key = cert_dir + "/vt-server-key.pem"
    vt_server_cert = cert_dir + "/vt-server-cert.pem"
    vt_server_req = cert_dir + "/vt-server-req.pem"
    vt_server_config = cert_dir + "/server.config"
    with open(vt_server_config, 'w') as fd:
      fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Vitess Server
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl(["req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch",
             "-config", vt_server_config,
             "-keyout", vt_server_key, "-out", vt_server_req])
    openssl(["rsa", "-in", vt_server_key, "-out", vt_server_key])
    openssl(["x509", "-req",
             "-in", vt_server_req,
             "-days", "3600",
             "-CA", ca_cert,
             "-CAkey", ca_key,
             "-set_serial", "03",
             "-out", vt_server_cert])

    extra_my_cnf = cert_dir + "/secure.cnf"
    fd = open(extra_my_cnf, "w")
    fd.write("ssl-ca=" + ca_cert + "\n")
    fd.write("ssl-cert=" + server_cert + "\n")
    fd.write("ssl-key=" + server_key + "\n")
    fd.close()

    setup_procs = [
        shard_0_master.init_mysql(extra_my_cnf=extra_my_cnf),
        shard_0_slave.init_mysql(extra_my_cnf=extra_my_cnf),
        ]
    utils.wait_procs(setup_procs)

    utils.run_vtctl('CreateKeyspace test_keyspace')

    shard_0_master.init_tablet('master',  'test_keyspace', '0')
    shard_0_slave.init_tablet('replica',  'test_keyspace', '0')

    utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True)

    # create databases so vttablet can start behaving normally
    shard_0_master.create_db('vt_test_keyspace')
    shard_0_slave.create_db('vt_test_keyspace')
  except:
    tearDownModule()
    raise
Beispiel #44
0
def setUpModule():
    global vtgate_server
    global vtgate_port
    global vtgate_socket_file
    global master_start_position

    try:
        environment.topo_server().setup()

        # start mysql instance external to the test
        setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()]
        utils.wait_procs(setup_procs)

        # Start up a master mysql and vttablet
        logging.debug('Setting up tablets')
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
        master_tablet.init_tablet('master', 'test_keyspace', '0')
        replica_tablet.init_tablet('replica', 'test_keyspace', '0')
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
        utils.validate_topology()
        master_tablet.create_db('vt_test_keyspace')
        master_tablet.create_db('other_database')
        replica_tablet.create_db('vt_test_keyspace')
        replica_tablet.create_db('other_database')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])

        vtgate_socket_file = environment.tmproot + '/vtgate.sock'
        vtgate_server, vtgate_port = utils.vtgate_start(
            socket_file=vtgate_socket_file)

        master_tablet.start_vttablet()
        replica_tablet.start_vttablet()
        utils.run_vtctl(['SetReadWrite', master_tablet.tablet_alias])
        utils.check_db_read_write(master_tablet.tablet_uid)

        for t in [master_tablet, replica_tablet]:
            t.reset_replication()
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/0',
            master_tablet.tablet_alias
        ],
                        auto_log=True)

        # reset counter so tests don't assert
        tablet.Tablet.tablets_running = 0

        master_start_position = _get_master_current_position()
        master_tablet.mquery('vt_test_keyspace', _create_vt_insert_test)
        master_tablet.mquery('vt_test_keyspace', _create_vt_a)
        master_tablet.mquery('vt_test_keyspace', _create_vt_b)

        utils.run_vtctl(['ReloadSchema', master_tablet.tablet_alias])
        utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias])

    except:
        tearDownModule()
        raise
Beispiel #45
0
    def test_reparent_with_down_slave(self, shard_id='0'):
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('spare',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)

        # wait for all tablets to start
        for t in [tablet_62344, tablet_62044, tablet_31981]:
            t.wait_for_vttablet_state('SERVING')
        tablet_41983.wait_for_vttablet_state('NOT_SERVING')

        # Recompute the shard layout node - until you do that, it might not be
        # valid.
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id])
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ])
        utils.validate_topology(ping_tablets=True)
        tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)

        utils.wait_procs([tablet_41983.shutdown_mysql()])

        # Perform a graceful reparent operation. It will fail as one tablet is down.
        _, stderr = utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/' + shard_id,
            tablet_62044.tablet_alias
        ],
                                    expect_fail=True)
        self.assertIn('TabletManager.SetMaster on test_nj-0000041983 error',
                      stderr)

        # insert data into the new master, check the connected slaves work
        self._populate_vt_insert_test(tablet_62044, 3)
        self._check_vt_insert_test(tablet_31981, 3)
        self._check_vt_insert_test(tablet_62344, 3)

        # restart mysql on the old slave, should still be connecting to the
        # old master
        utils.wait_procs([tablet_41983.start_mysql()])

        utils.pause('check orphan')

        # reparent the tablet (will not start replication, so we have to
        # do it ourselves), then it should catch up on replication really quickly
        utils.run_vtctl(['ReparentTablet', tablet_41983.tablet_alias])
        utils.run_vtctl(['StartSlave', tablet_41983.tablet_alias])

        # wait until it gets the data
        self._check_vt_insert_test(tablet_41983, 3)

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])
Beispiel #46
0
    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', '--split_shard_count', '2',
            'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', '-split_shard_count', '4',
            'test_keyspace', 'keyspace_id', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_slave.init_tablet('spare', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_ny_slave.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['SplitShardCount'], 4)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_slave,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_slave, shard_1_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_0_ny_slave.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_ny_slave.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_rdonly.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('spare', 'test_keyspace', 'c0-')
        shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None,
                                      target_tablet_type='replica')
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_replica, shard_3_rdonly
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/c0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        if use_clone_worker:
            # the worker will do everything. We test with source_reader_count=10
            # (down from default=20) as connection pool is not big enough for 20.
            # min_table_size_for_split is set to 1 as to force a split even on the
            # small table we have.
            utils.run_vtworker([
                '--cell', 'test_nj', '--command_display_interval', '10ms',
                'SplitClone', '--exclude_tables', 'unrelated', '--strategy',
                'populateBlpCheckpoint', '--source_reader_count', '10',
                '--min_table_size_for_split', '1', 'test_keyspace/80-c0'
            ],
                               auto_log=True)
            utils.run_vtctl(
                ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                auto_log=True)

            # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        else:
            # take the snapshot for the split
            utils.run_vtctl([
                'MultiSnapshot', '--spec=80-c0-', '--exclude_tables=unrelated',
                shard_1_slave1.tablet_alias
            ],
                            auto_log=True)

            # the snapshot_copy hook will copy the snapshot files to
            # VTDATAROOT/tmp/... as a test. We want to use these for one half,
            # but not for the other, so we test both scenarios.
            os.unlink(
                os.path.join(
                    environment.tmproot, "snapshot-from-%s-for-%s.tar" %
                    (shard_1_slave1.tablet_alias, "80-c0")))

            # wait for tablet's binlog server service to be enabled after snapshot
            shard_1_slave1.wait_for_binlog_server_state("Enabled")

            # perform the restores: first one from source tablet. We removed the
            # storage backup, so it's coming from the tablet itself.
            # we also delay starting the binlog player, then enable it.
            utils.run_vtctl([
                'ShardMultiRestore',
                '-strategy=populateBlpCheckpoint,dontStartBinlogPlayer',
                'test_keyspace/80-c0', shard_1_slave1.tablet_alias
            ],
                            auto_log=True)

            timeout = 10
            while True:
                shard_2_master_status = shard_2_master.get_status()
                if not "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                    timeout = utils.wait_step(
                        'shard 2 master has not failed starting yet', timeout)
                    continue
                logging.debug(
                    "shard 2 master is waiting on flag removal, good")
                break

            qr = utils.run_vtctl_json([
                'ExecuteFetch', shard_2_master.tablet_alias,
                'update _vt.blp_checkpoint set flags="" where source_shard_uid=0'
            ])
            self.assertEqual(qr['RowsAffected'], 1)

            timeout = 10
            while True:
                shard_2_master_status = shard_2_master.get_status()
                if "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                    timeout = utils.wait_step(
                        'shard 2 master has not started replication yet',
                        timeout)
                    continue
                logging.debug("shard 2 master has started replication, good")
                break

            # second restore from storage: to be sure, we stop vttablet, and restart
            # it afterwards
            shard_1_slave1.kill_vttablet()
            utils.run_vtctl([
                'ShardMultiRestore', '-strategy=populateBlpCheckpoint',
                'test_keyspace/c0-', shard_1_slave1.tablet_alias
            ],
                            auto_log=True)
            shard_1_slave1.start_vttablet(wait_for_state=None)
            shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master,
                                       seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master,
                                       seconds_behind_master_max=30)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('Binlog player state: Running', shard_2_master_status)
        self.assertIn(
            '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>',
            shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere.
        shard_2_master_vars = utils.get_vars(shard_2_master.port)
        self.assertEqual(shard_2_master_vars['TabletStateName'], 'NOT_SERVING')
        shard_3_master_vars = utils.get_vars(shard_3_master.port)
        self.assertEqual(shard_3_master_vars['TabletStateName'], 'NOT_SERVING')

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # then serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'ReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])
        logging.debug(
            "Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use the vtworker checker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('No binlog player is running', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # scrap the original tablets in the original shard
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_slave, shard_1_rdonly
        ]:
            utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave,
            shard_1_rdonly
        ])
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_slave, shard_1_rdonly
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        if shard['Cells']:
            self.fail("Non-empty Cells record for shard: %s" % str(shard))

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_slave, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly
        ])
Beispiel #47
0
    def _test_reparent_slave_offline(self, shard_id='0'):
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)

        # wait for all tablets to start
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('SERVING')

        # Recompute the shard layout node - until you do that, it might not be
        # valid.
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id])
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ])
        utils.validate_topology(ping_tablets=True)

        self._check_db_addr(shard_id, 'master', tablet_62344.port)

        # Kill one tablet so we seem offline
        tablet_31981.kill_vttablet()

        # Perform a graceful reparent operation.
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/' + shard_id,
            tablet_62044.tablet_alias
        ])
        self._check_db_addr(shard_id, 'master', tablet_62044.port)

        tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983])
Beispiel #48
0
  def test_secure(self):
    vtgate_server, vtgate_port = utils.vtgate_start(cache_ttl='0s')

    # start the tablets
    shard_0_master.start_vttablet(cert=cert_dir + "/vt-server-cert.pem",
                                  key=cert_dir + "/vt-server-key.pem")
    shard_0_slave.start_vttablet(cert=cert_dir + "/vt-server-cert.pem",
                                 key=cert_dir + "/vt-server-key.pem",
                                 repl_extra_flags={
        'flags': "2048",
        'ssl-ca': cert_dir + "/ca-cert.pem",
        'ssl-cert': cert_dir + "/client-cert.pem",
        'ssl-key': cert_dir + "/client-key.pem",
        })

    # Reparent using SSL
    for t in [shard_0_master, shard_0_slave]:
      t.reset_replication()
    utils.run_vtctl('ReparentShard -force test_keyspace/0 ' + shard_0_master.tablet_alias, auto_log=True)

    # then get the topology and check it
    topo_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                        "test_nj", 30.0)
    topology.read_keyspaces(topo_client)

    shard_0_master_addrs = topology.get_host_port_by_name(topo_client, "test_keyspace.0.master:_vts")
    if len(shard_0_master_addrs) != 1:
      self.fail('topology.get_host_port_by_name failed for "test_keyspace.0.master:_vts", got: %s' % " ".join(["%s:%u(%s)" % (h, p, str(e)) for (h, p, e) in shard_0_master_addrs]))
    if shard_0_master_addrs[0][2] != True:
      self.fail('topology.get_host_port_by_name failed for "test_keyspace.0.master:_vts" is not encrypted')
    logging.debug("shard 0 master addrs: %s", " ".join(["%s:%u(%s)" % (h, p, str(e)) for (h, p, e) in shard_0_master_addrs]))

    # make sure asking for optionally secure connections works too
    auto_addrs = topology.get_host_port_by_name(topo_client, "test_keyspace.0.master:_vtocc", encrypted=True)
    if auto_addrs != shard_0_master_addrs:
      self.fail('topology.get_host_port_by_name doesn\'t resolve encrypted addresses properly: %s != %s' % (str(shard_0_master_addrs), str(auto_addrs)))

    # try to connect with regular client
    try:
      conn = tablet3.TabletConnection("%s:%u" % (shard_0_master_addrs[0][0], shard_0_master_addrs[0][1]),
                                      "", "test_keyspace", "0", 10.0)
      conn.dial()
      self.fail("No exception raised to secure port")
    except dbexceptions.FatalError as e:
      if not e.args[0][0].startswith('Unexpected EOF in handshake to'):
        self.fail("Unexpected exception: %s" % str(e))

    sconn = utils.get_vars(shard_0_master.port)["SecureConnections"]
    if sconn != 0:
      self.fail("unexpected conns %s" % sconn)

    # connect to encrypted port
    conn = tablet3.TabletConnection("%s:%u" % (shard_0_master_addrs[0][0], shard_0_master_addrs[0][1]),
                                    "", "test_keyspace", "0", 5.0, encrypted=True)
    conn.dial()
    (results, rowcount, lastrowid, fields) = conn._execute("select 1 from dual", {})
    self.assertEqual(results, [(1,),], 'wrong conn._execute output: %s' % str(results))

    sconn = utils.get_vars(shard_0_master.port)["SecureConnections"]
    if sconn != 1:
      self.fail("unexpected conns %s" % sconn)
    saccept = utils.get_vars(shard_0_master.port)["SecureAccepts"]
    if saccept == 0:
      self.fail("unexpected accepts %s" % saccept)

    # trigger a time out on a secure connection, see what exception we get
    try:
      conn._execute("select sleep(100) from dual", {})
      self.fail("No timeout exception")
    except dbexceptions.TimeoutError as e:
      logging.debug("Got the right exception for SSL timeout: %s", str(e))

    # start a vtgate to connect to that tablet
    gate_proc, gate_port, gate_secure_port = utils.vtgate_start(
        tablet_bson_encrypted=True,
        cert=cert_dir + "/vt-server-cert.pem",
        key=cert_dir + "/vt-server-key.pem")

    # try to connect to vtgate with regular client
    timeout = 2.0
    try:
      conn = vtgatev2.connect(["localhost:%s" % (gate_secure_port),],
                               timeout)
      self.fail("No exception raised to VTGate secure port")
    except dbexceptions.OperationalError as e:
      exception_type = e.args[2]
      exception_msg = str(e.args[2][0][0])
      self.assertIsInstance(exception_type, dbexceptions.FatalError,
                            "unexpected exception type")
      if not exception_msg.startswith('Unexpected EOF in handshake to'):
        self.fail("Unexpected exception message: %s" % exception_msg)

    sconn = utils.get_vars(gate_port)["SecureConnections"]
    if sconn != 0:
      self.fail("unexpected conns %s" % sconn)

    # connect to vtgate with encrypted port
    conn = vtgatev2.connect(["localhost:%s" % (gate_secure_port),],
                             timeout, encrypted=True)
    (results, rowcount, lastrowid, fields) = conn._execute(
        "select 1 from dual",
        {},
        "test_keyspace",
        "master",
        keyranges=[keyrange.KeyRange(keyrange_constants.NON_PARTIAL_KEYRANGE),])
    self.assertEqual(rowcount, 1, "want 1, got %d" % (rowcount))
    self.assertEqual(len(fields), 1, "want 1, got %d" % (len(fields)))
    self.assertEqual(results, [(1,),], 'wrong conn._execute output: %s' % str(results))

    sconn = utils.get_vars(gate_port)["SecureConnections"]
    if sconn != 1:
      self.fail("unexpected conns %s" % sconn)
    saccept = utils.get_vars(gate_port)["SecureAccepts"]
    if saccept == 0:
      self.fail("unexpected accepts %s" % saccept)

    # trigger a time out on a vtgate secure connection, see what exception we get
    try:
      conn._execute("select sleep(4) from dual",
                    {},
                    "test_keyspace",
                    "master",
                    keyranges=[keyrange.KeyRange(keyrange_constants.NON_PARTIAL_KEYRANGE),])
      self.fail("No timeout exception")
    except dbexceptions.TimeoutError as e:
      logging.debug("Got the right exception for SSL timeout: %s", str(e))
    conn.close()
    utils.vtgate_kill(gate_proc)

    # kill everything
    utils.vtgate_kill(vtgate_server)
Beispiel #49
0
 def test_reparent_graceful(self):
     utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
     self._test_reparent_graceful('0')
Beispiel #50
0
    def _test_reparent_from_outside(self, brutal=False):
        """This test will start a master and 3 slaves.

    Then:
    - one slave will be the new master
    - one slave will be reparented to that new master
    - one slave will be busted and dead in the water
    and we'll call TabletExternallyReparented.

    Args:
      brutal: kills the old master first
    """
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)

        # wait for all tablets to start
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('SERVING')

        # Reparent as a starting point
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl(
            ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias],
            auto_log=True)

        # now manually reparent 1 out of 2 tablets
        # 62044 will be the new master
        # 31981 won't be re-parented, so it will be busted
        tablet_62044.mquery('', mysql_flavor().promote_slave_commands())
        new_pos = mysql_flavor().master_position(tablet_62044)
        logging.debug('New master position: %s', str(new_pos))
        # Use 'localhost' as hostname because Travis CI worker hostnames
        # are too long for MySQL replication.
        change_master_cmds = mysql_flavor().change_master_commands(
            'localhost', tablet_62044.mysql_port, new_pos)

        # 62344 will now be a slave of 62044
        tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
                            change_master_cmds + ['START SLAVE'])

        # 41983 will be a slave of 62044
        tablet_41983.mquery('', ['STOP SLAVE'] + change_master_cmds +
                            ['START SLAVE'])

        # in brutal mode, we kill the old master first
        # and delete its tablet record
        if brutal:
            tablet_62344.kill_vttablet()
            utils.run_vtctl(
                ['DeleteTablet', '-allow_master', tablet_62344.tablet_alias],
                auto_log=True)

        base_time = time.time()

        # update topology with the new server
        utils.run_vtctl(
            ['TabletExternallyReparented', tablet_62044.tablet_alias],
            mode=utils.VTCTL_VTCTL,
            auto_log=True)

        self._test_reparent_from_outside_check(brutal, base_time)

        # RebuildReplicationGraph will rebuild the topo data from
        # the tablet records. It is an emergency command only.
        utils.run_vtctl(
            ['RebuildReplicationGraph', 'test_nj', 'test_keyspace'])

        self._test_reparent_from_outside_check(brutal, base_time)

        if not brutal:
            tablet_62344.kill_vttablet()
        tablet.kill_tablets([tablet_31981, tablet_62044, tablet_41983])
Beispiel #51
0
    def test_reparent_cross_cell(self, shard_id='0'):
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
        self.assertEqual(
            shard['cells'], ['test_nj'],
            'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('SERVING')
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
        self.assertEqual(
            shard['cells'], ['test_nj', 'test_ny'],
            'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Recompute the shard layout node - until you do that, it might not be
        # valid.
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id])
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ],
                        auto_log=True)
        utils.validate_topology(ping_tablets=True)

        self._check_db_addr(shard_id, 'master', tablet_62344.port)

        # Verify MasterCell is properly set
        self._check_master_cell('test_nj', shard_id, 'test_nj')
        self._check_master_cell('test_ny', shard_id, 'test_nj')

        # Perform a graceful reparent operation to another cell.
        utils.pause('test_reparent_cross_cell PlannedReparentShard')
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/' + shard_id,
            tablet_31981.tablet_alias
        ],
                        auto_log=True)
        utils.validate_topology()

        self._check_db_addr(shard_id,
                            'master',
                            tablet_31981.port,
                            cell='test_ny')

        # Verify MasterCell is set to new cell.
        self._check_master_cell('test_nj', shard_id, 'test_ny')
        self._check_master_cell('test_ny', shard_id, 'test_ny')

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])
Beispiel #52
0
    def _test_reparent_graceful(self, shard_id):
        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 shard_id,
                                 start=True)
        if environment.topo_server().flavor() == 'zookeeper':
            shard = utils.run_vtctl_json(
                ['GetShard', 'test_keyspace/' + shard_id])
            self.assertEqual(
                shard['cells'], ['test_nj'],
                'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        for t in [tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('SERVING')
        if environment.topo_server().flavor() == 'zookeeper':
            shard = utils.run_vtctl_json(
                ['GetShard', 'test_keyspace/' + shard_id])
            self.assertEqual(
                shard['cells'], ['test_nj', 'test_ny'],
                'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Recompute the shard layout node - until you do that, it might not be
        # valid.
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id])
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ])
        utils.validate_topology(ping_tablets=True)
        tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)

        self._check_db_addr(shard_id, 'master', tablet_62344.port)

        # Verify MasterCell is set to new cell.
        self._check_master_cell('test_nj', shard_id, 'test_nj')
        self._check_master_cell('test_ny', shard_id, 'test_nj')

        # Convert two replica to spare. That should leave only one node
        # serving traffic, but still needs to appear in the replication
        # graph.
        utils.run_vtctl(
            ['ChangeSlaveType', tablet_41983.tablet_alias, 'spare'])
        utils.run_vtctl(
            ['ChangeSlaveType', tablet_31981.tablet_alias, 'spare'])
        utils.validate_topology()
        self._check_db_addr(shard_id, 'replica', tablet_62044.port)

        # Run this to make sure it succeeds.
        utils.run_vtctl(
            ['ShardReplicationPositions', 'test_keyspace/' + shard_id],
            stdout=utils.devnull)

        # Perform a graceful reparent operation.
        utils.pause('_test_reparent_graceful PlannedReparentShard')
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/' + shard_id,
            tablet_62044.tablet_alias
        ],
                        auto_log=True)
        utils.validate_topology()

        self._check_db_addr(shard_id, 'master', tablet_62044.port)

        # insert data into the new master, check the connected slaves work
        self._populate_vt_insert_test(tablet_62044, 1)
        self._check_vt_insert_test(tablet_41983, 1)
        self._check_vt_insert_test(tablet_62344, 1)

        # Verify MasterCell is set to new cell.
        self._check_master_cell('test_nj', shard_id, 'test_nj')
        self._check_master_cell('test_ny', shard_id, 'test_nj')

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])

        # Test address correction.
        new_port = environment.reserve_ports(1)
        tablet_62044.start_vttablet(port=new_port)

        # Wait until the new address registers.
        timeout = 30.0
        while True:
            try:
                self._check_db_addr(shard_id, 'master', new_port)
                break
            except protocols_flavor().client_error_exception_type():
                timeout = utils.wait_step('waiting for new port to register',
                                          timeout,
                                          sleep_time=0.1)

        tablet_62044.kill_vttablet()
Beispiel #53
0
  def test_vertical_split(self):
    utils.run_vtctl(['CopySchemaShard', '--tables', '/moving/,view1',
                     source_rdonly1.tablet_alias, 'destination_keyspace/0'],
                    auto_log=True)

    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        '--use_v3_resharding_mode=false',
                        'VerticalSplitClone',
                        '--tables', '/moving/,view1',
                        '--chunk_count', '10',
                        '--min_rows_per_chunk', '1',
                        '--min_healthy_rdonly_tablets', '1',
                        'destination_keyspace/0'],
                       auto_log=True)

    # test Cancel first
    utils.run_vtctl(['CancelResharding', 'destination_keyspace/0'], auto_log=True)
    self.check_no_binlog_player(destination_master)

    # redo VerticalSplitClone
    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        '--use_v3_resharding_mode=false',
                        'VerticalSplitClone',
                        '--tables', '/moving/,view1',
                        '--chunk_count', '10',
                        '--min_rows_per_chunk', '1',
                        '--min_healthy_rdonly_tablets', '1',
                        'destination_keyspace/0'],
                       auto_log=True)

    # check values are present
    self._check_values(destination_master, 'vt_destination_keyspace', 'moving1',
                       self.moving1_first, 100)
    self._check_values(destination_master, 'vt_destination_keyspace', 'moving2',
                       self.moving2_first, 100)
    self._check_values(destination_master, 'vt_destination_keyspace', 'view1',
                       self.moving1_first, 100)
    if base_sharding.use_rbr:
      self._check_values(destination_master, 'vt_destination_keyspace',
                         'moving3_no_pk', self.moving3_no_pk_first, 100)

    # Verify vreplication table entries
    result = destination_master.mquery('_vt', 'select * from vreplication')
    self.assertEqual(len(result), 1)
    self.assertEqual(result[0][1], 'SplitClone')
    self.assertEqual(result[0][2],
      'keyspace:"source_keyspace" shard:"0" tables:"/moving/" tables:"view1" ')

    # check the binlog player is running and exporting vars
    self.check_destination_master(destination_master, ['source_keyspace/0'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(source_replica, horizontal=False)

    # add values to source, make sure they're replicated
    moving1_first_add1 = self._insert_values('moving1', 100)
    _ = self._insert_values('staying1', 100)
    moving2_first_add1 = self._insert_values('moving2', 100)
    self._check_values_timeout(destination_master, 'vt_destination_keyspace',
                               'moving1', moving1_first_add1, 100)
    self._check_values_timeout(destination_master, 'vt_destination_keyspace',
                               'moving2', moving2_first_add1, 100)
    self.check_binlog_player_vars(destination_master, ['source_keyspace/0'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(source_replica, horizontal=False,
                                  min_statements=100, min_transactions=100)

    # use vtworker to compare the data
    logging.debug('Running vtworker VerticalSplitDiff')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'VerticalSplitDiff',
                        '--min_healthy_rdonly_tablets', '1',
                        'destination_keyspace/0'], auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # get status for destination master tablet, make sure we have it all
    self.check_running_binlog_player(destination_master, 700, 300,
                                     extra_text='moving')

    # check query service is off on destination master, as filtered
    # replication is enabled. Even health check should not interfere.
    destination_master_vars = utils.get_vars(destination_master.port)
    self.assertEqual(destination_master_vars['TabletStateName'], 'NOT_SERVING')

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'],
                    expect_fail=True)

    # migrate rdonly only in test_ny cell, make sure nothing is migrated
    # in test_nj
    utils.run_vtctl(['MigrateServedFrom', '--cells=test_ny',
                     'destination_keyspace/0', 'rdonly'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(rdonly): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly1, None)
    self._check_blacklisted_tables(source_rdonly2, None)

    # migrate test_nj only, using command line manual fix command,
    # and restore it back.
    keyspace_json = utils.run_vtctl_json(
        ['GetKeyspace', 'destination_keyspace'])
    found = False
    for ksf in keyspace_json['served_froms']:
      if ksf['tablet_type'] == topodata_pb2.RDONLY:
        found = True
        self.assertEqual(ksf['cells'], ['test_nj'])
    self.assertTrue(found)
    utils.run_vtctl(['SetKeyspaceServedFrom', '-source=source_keyspace',
                     '-remove', '-cells=test_nj', 'destination_keyspace',
                     'rdonly'], auto_log=True)
    keyspace_json = utils.run_vtctl_json(
        ['GetKeyspace', 'destination_keyspace'])
    found = False
    for ksf in keyspace_json['served_froms']:
      if ksf['tablet_type'] == topodata_pb2.RDONLY:
        found = True
    self.assertFalse(found)
    utils.run_vtctl(['SetKeyspaceServedFrom', '-source=source_keyspace',
                     'destination_keyspace', 'rdonly'],
                    auto_log=True)
    keyspace_json = utils.run_vtctl_json(
        ['GetKeyspace', 'destination_keyspace'])
    found = False
    for ksf in keyspace_json['served_froms']:
      if ksf['tablet_type'] == topodata_pb2.RDONLY:
        found = True
        self.assertTrue('cells' not in ksf or not ksf['cells'])
    self.assertTrue(found)

    # now serve rdonly from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'rdonly'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    self._check_client_conn_redirection(
        'destination_keyspace',
        ['master', 'replica'], ['moving1', 'moving2'])

    # then serve replica from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    self._check_client_conn_redirection(
        'destination_keyspace',
        ['master'], ['moving1', 'moving2'])

    # move replica back and forth
    utils.run_vtctl(['MigrateServedFrom', '-reverse',
                     'destination_keyspace/0', 'replica'], auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    self._check_client_conn_redirection(
        'destination_keyspace',
        ['master'], ['moving1', 'moving2'])

    # Cancel should fail now
    utils.run_vtctl(['CancelResharding', 'destination_keyspace/0'],
                    auto_log=True, expect_fail=True)

    # then serve master from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'],
                    auto_log=True)
    self._check_srv_keyspace('')
    self._check_blacklisted_tables(source_master, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_replica, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])

    # check the binlog player is gone now
    self.check_no_binlog_player(destination_master)

    # check the stats are correct
    self._check_stats()

    # now remove the tables on the source shard. The blacklisted tables
    # in the source shard won't match any table, make sure that works.
    utils.run_vtctl(['ApplySchema',
                     '-sql=drop view view1',
                     'source_keyspace'],
                    auto_log=True)
    for t in ['moving1', 'moving2']:
      utils.run_vtctl(['ApplySchema',
                       '-sql=drop table %s' % (t),
                       'source_keyspace'],
                      auto_log=True)
    for t in [source_master, source_replica, source_rdonly1, source_rdonly2]:
      utils.run_vtctl(['ReloadSchema', t.tablet_alias])
    qr = source_master.execute('select count(1) from staying1')
    self.assertEqual(len(qr['rows']), 1,
                     'cannot read staying1: got %s' % str(qr))

    # test SetShardTabletControl
    self._verify_vtctl_set_shard_tablet_control()
Beispiel #54
0
 def test_reparent_graceful_range_based(self):
     utils.run_vtctl([
         'CreateKeyspace', '--sharding_column_name', 'keyspace_id',
         '--sharding_column_type', 'uint64', 'test_keyspace'
     ])
     self._test_reparent_graceful('0000000000000000-ffffffffffffffff')
Beispiel #55
0
    def _test_vtctl_snapshot_restore(self, server_mode):
        if server_mode:
            snapshot_flags = ['-server-mode', '-concurrency=8']
            restore_flags = ['-dont-wait-for-slave-start']
        else:
            snapshot_flags = ['-concurrency=4']
            restore_flags = []

        # Start up a master mysql and vttablet
        utils.run_vtctl(['CreateKeyspace', 'snapshot_test'])

        tablet_62344.init_tablet('master', 'snapshot_test', '0')
        utils.run_vtctl(['RebuildShardGraph', 'snapshot_test/0'])
        utils.validate_topology()

        tablet_62344.populate('vt_snapshot_test', self._create_vt_insert_test,
                              self._populate_vt_insert_test)

        tablet_62044.create_db('vt_snapshot_test')

        tablet_62344.start_vttablet()

        # Need to force snapshot since this is a master db.
        out, err = utils.run_vtctl(['Snapshot', '-force'] + snapshot_flags +
                                   [tablet_62344.tablet_alias],
                                   trap_output=True)
        results = {}
        for name in [
                'Manifest', 'ParentAlias', 'SlaveStartRequired', 'ReadOnly',
                'OriginalType'
        ]:
            sepPos = err.find(name + ": ")
            if sepPos != -1:
                results[name] = err[sepPos + len(name) + 2:].splitlines()[0]
        if "Manifest" not in results:
            self.fail("Snapshot didn't echo Manifest file: %s" % str(err))
        if "ParentAlias" not in results:
            self.fail("Snapshot didn't echo ParentAlias: %s" % str(err))
        utils.pause("snapshot finished: " + results['Manifest'] + " " +
                    results['ParentAlias'])
        if server_mode:
            if "SlaveStartRequired" not in results:
                self.fail("Snapshot didn't echo SlaveStartRequired: %s" % err)
            if "ReadOnly" not in results:
                self.fail("Snapshot didn't echo ReadOnly %s" % err)
            if "OriginalType" not in results:
                self.fail("Snapshot didn't echo OriginalType: %s" % err)
            if (results['SlaveStartRequired'] != 'false'
                    or results['ReadOnly'] != 'true'
                    or results['OriginalType'] != 'master'):
                self.fail("Bad values returned by Snapshot: %s" % err)
        tablet_62044.init_tablet('idle', start=True)

        # do not specify a MANIFEST, see if 'default' works
        call(["touch", "/tmp/vtSimulateFetchFailures"])
        utils.run_vtctl(
            ['Restore', '-fetch-concurrency=2', '-fetch-retry-count=4'] +
            restore_flags + [
                tablet_62344.tablet_alias, 'default',
                tablet_62044.tablet_alias, results['ParentAlias']
            ],
            auto_log=True)
        utils.pause("restore finished")

        tablet_62044.assert_table_count('vt_snapshot_test', 'vt_insert_test',
                                        4)

        utils.validate_topology()

        # in server_mode, get the server out of it and check it
        if server_mode:
            utils.run_vtctl([
                'SnapshotSourceEnd', tablet_62344.tablet_alias,
                results['OriginalType']
            ],
                            auto_log=True)
            tablet_62344.assert_table_count('vt_snapshot_test',
                                            'vt_insert_test', 4)
            utils.validate_topology()

        tablet.kill_tablets([tablet_62344, tablet_62044])
Beispiel #56
0
    def test_reparent_down_master(self):
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 start=True,
                                 wait_for_start=False)

        # wait for all tablets to start
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('SERVING')

        # Recompute the shard layout node - until you do that, it might not be
        # valid.
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl(
            ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias],
            auto_log=True)
        utils.validate_topology()
        tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)

        # Make the current master agent and database unavailable.
        tablet_62344.kill_vttablet()
        tablet_62344.shutdown_mysql().wait()

        self._check_db_addr('0', 'master', tablet_62344.port)

        # Perform a planned reparent operation, will try to contact
        # the current master and fail somewhat quickly
        _, stderr = utils.run_vtctl([
            '-wait-time', '5s', 'PlannedReparentShard', 'test_keyspace/0',
            tablet_62044.tablet_alias
        ],
                                    expect_fail=True)
        self.assertIn('DemoteMaster failed', stderr)

        # Run forced reparent operation, this should now proceed unimpeded.
        utils.run_vtctl([
            'EmergencyReparentShard', 'test_keyspace/0',
            tablet_62044.tablet_alias
        ],
                        auto_log=True)

        utils.validate_topology()
        self._check_db_addr('0', 'master', tablet_62044.port)

        # insert data into the new master, check the connected slaves work
        self._populate_vt_insert_test(tablet_62044, 2)
        self._check_vt_insert_test(tablet_41983, 2)
        self._check_vt_insert_test(tablet_31981, 2)

        tablet.kill_tablets([tablet_62044, tablet_41983, tablet_31981])

        # so the other tests don't have any surprise
        tablet_62344.start_mysql().wait()
Beispiel #57
0
    def verify_successful_worker_copy_with_reparent(self, mysql_down=False):
        """Verifies that vtworker can successfully copy data for a SplitClone.

    Order of operations:
    1. Run a background vtworker
    2. Wait until the worker successfully resolves the destination masters.
    3. Reparent the destination tablets
    4. Wait until the vtworker copy is finished
    5. Verify that the worker was forced to reresolve topology and retry writes
      due to the reparent.
    6. Verify that the data was copied successfully to both new shards

    Args:
      mysql_down: boolean. If True, we take down the MySQL instances on the
        destination masters at first, then bring them back and reparent away.

    Raises:
      AssertionError if things didn't go as expected.
    """
        if mysql_down:
            logging.debug('Shutting down mysqld on destination masters.')
            utils.wait_procs([
                shard_0_master.shutdown_mysql(),
                shard_1_master.shutdown_mysql()
            ])

        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            ['--cell', 'test_nj'], auto_log=True)

        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--source_reader_count', '1',
            '--destination_writer_count', '1', '--write_query_max_rows', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/0'
        ], worker_rpc_port)

        if mysql_down:
            # If MySQL is down, we wait until vtworker retried at least once to make
            # sure it reached the point where a write failed due to MySQL being down.
            # There should be two retries at least, one for each destination shard.
            utils.poll_for_vars(
                'vtworker',
                worker_port,
                'WorkerRetryCount >= 2',
                condition_fn=lambda v: v.get('WorkerRetryCount') >= 2)
            logging.debug(
                'Worker has retried at least twice, starting reparent now')

            # vtworker is blocked at this point. This is a good time to test that its
            # throttler server is reacting to RPCs.
            self.check_binlog_throttler(
                'localhost:%d' % worker_rpc_port,
                ['test_keyspace/-80', 'test_keyspace/80-'], 9999)

            # Bring back masters. Since we test with semi-sync now, we need at least
            # one replica for the new master. This test is already quite expensive,
            # so we bring back the old master as a replica rather than having a third
            # replica up the whole time.
            logging.debug('Restarting mysqld on destination masters')
            utils.wait_procs(
                [shard_0_master.start_mysql(),
                 shard_1_master.start_mysql()])

            # Reparent away from the old masters.
            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/-80',
                shard_0_replica.tablet_alias
            ],
                            auto_log=True)
            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/80-',
                shard_1_replica.tablet_alias
            ],
                            auto_log=True)

        else:
            # NOTE: There is a race condition around this:
            #   It's possible that the SplitClone vtworker command finishes before the
            #   PlannedReparentShard vtctl command, which we start below, succeeds.
            #   Then the test would fail because vtworker did not have to retry.
            #
            # To workaround this, the test takes a parameter to increase the number of
            # rows that the worker has to copy (with the idea being to slow the worker
            # down).
            # You should choose a value for num_insert_rows, such that this test
            # passes for your environment (trial-and-error...)
            # Make sure that vtworker got past the point where it picked a master
            # for each destination shard ("finding targets" state).
            utils.poll_for_vars(
                'vtworker',
                worker_port,
                'WorkerState == cloning the data (online)',
                condition_fn=lambda v: v.get('WorkerState') == 'cloning the'
                ' data (online)')
            logging.debug('Worker is in copy state, starting reparent now')

            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/-80',
                shard_0_replica.tablet_alias
            ],
                            auto_log=True)
            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/80-',
                shard_1_replica.tablet_alias
            ],
                            auto_log=True)

        utils.wait_procs([workerclient_proc])

        # Verify that we were forced to re-resolve and retry.
        worker_vars = utils.get_vars(worker_port)
        # There should be two retries at least, one for each destination shard.
        self.assertGreater(worker_vars['WorkerRetryCount'], 1)
        self.assertNotEqual(worker_vars['WorkerRetryCount'], {},
                            "expected vtworker to retry, but it didn't")
        utils.kill_sub_process(worker_proc, soft=True)

        # Make sure that everything is caught up to the same replication point
        self.run_split_diff('test_keyspace/-80', all_shard_tablets,
                            shard_0_tablets)
        self.run_split_diff('test_keyspace/80-', all_shard_tablets,
                            shard_1_tablets)

        self.assert_shard_data_equal(0, shard_master, shard_0_tablets.replica)
        self.assert_shard_data_equal(1, shard_master, shard_1_tablets.replica)
Beispiel #58
0
  def _init_keyspaces_and_tablets(self):
    utils.run_vtctl(['CreateKeyspace', 'source_keyspace'])
    utils.run_vtctl(
        ['CreateKeyspace', '--served_from',
         'master:source_keyspace,replica:source_keyspace,rdonly:'
         'source_keyspace',
         'destination_keyspace'])

    source_master.init_tablet(
        'replica',
        keyspace='source_keyspace',
        shard='0',
        tablet_index=0)
    source_replica.init_tablet(
        'replica',
        keyspace='source_keyspace',
        shard='0',
        tablet_index=1)
    source_rdonly1.init_tablet(
        'rdonly',
        keyspace='source_keyspace',
        shard='0',
        tablet_index=2)
    source_rdonly2.init_tablet(
        'rdonly',
        keyspace='source_keyspace',
        shard='0',
        tablet_index=3)
    destination_master.init_tablet(
        'replica',
        keyspace='destination_keyspace',
        shard='0',
        tablet_index=0)
    destination_replica.init_tablet(
        'replica',
        keyspace='destination_keyspace',
        shard='0',
        tablet_index=1)
    destination_rdonly1.init_tablet(
        'rdonly',
        keyspace='destination_keyspace',
        shard='0',
        tablet_index=2)
    destination_rdonly2.init_tablet(
        'rdonly',
        keyspace='destination_keyspace',
        shard='0',
        tablet_index=3)

    utils.run_vtctl(
        ['RebuildKeyspaceGraph', 'source_keyspace'], auto_log=True)
    utils.run_vtctl(
        ['RebuildKeyspaceGraph', 'destination_keyspace'], auto_log=True)

    self._create_source_schema()

    for t in [source_master, source_replica,
              destination_master, destination_replica]:
      t.start_vttablet(wait_for_state=None)
    for t in [source_rdonly1, source_rdonly2,
              destination_rdonly1, destination_rdonly2]:
      t.start_vttablet(wait_for_state=None)

    # wait for the tablets
    master_tablets = [source_master, destination_master]
    replica_tablets = [
        source_replica, source_rdonly1, source_rdonly2,
        destination_replica, destination_rdonly1,
        destination_rdonly2]
    for t in master_tablets + replica_tablets:
      t.wait_for_vttablet_state('NOT_SERVING')

    # check SrvKeyspace
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(rdonly): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')

    # reparent to make the tablets work (we use health check, fix their types)
    utils.run_vtctl(['InitShardMaster', '-force', 'source_keyspace/0',
                     source_master.tablet_alias], auto_log=True)
    source_master.tablet_type = 'master'
    utils.run_vtctl(['InitShardMaster', '-force', 'destination_keyspace/0',
                     destination_master.tablet_alias], auto_log=True)
    destination_master.tablet_type = 'master'

    for t in [source_replica, destination_replica]:
      utils.wait_for_tablet_type(t.tablet_alias, 'replica')
    for t in [source_rdonly1, source_rdonly2,
              destination_rdonly1, destination_rdonly2]:
      utils.wait_for_tablet_type(t.tablet_alias, 'rdonly')

    for t in master_tablets + replica_tablets:
      t.wait_for_vttablet_state('SERVING')
Beispiel #59
0
    def run_shard_tablets(self, shard_name, shard_tablets, create_table=True):
        """Handles all the necessary work for initially running a shard's tablets.

    This encompasses the following steps:
      1. (optional) Create db
      2. Starting vttablets and let themselves init them
      3. Waiting for the appropriate vttablet state
      4. Force reparent to the master tablet
      5. RebuildKeyspaceGraph
      7. (optional) Running initial schema setup

    Args:
      shard_name: the name of the shard to start tablets in
      shard_tablets: an instance of ShardTablets for the given shard
      create_table: boolean, True iff we should create a table on the tablets
    """
        # Start tablets.
        #
        # NOTE: The future master has to be started with type 'replica'.
        shard_tablets.master.start_vttablet(wait_for_state=None,
                                            init_tablet_type='replica',
                                            init_keyspace='test_keyspace',
                                            init_shard=shard_name)
        for t in shard_tablets.replicas:
            t.start_vttablet(wait_for_state=None,
                             init_tablet_type='replica',
                             init_keyspace='test_keyspace',
                             init_shard=shard_name)
        for t in shard_tablets.rdonlys:
            t.start_vttablet(wait_for_state=None,
                             init_tablet_type='rdonly',
                             init_keyspace='test_keyspace',
                             init_shard=shard_name)

        # Block until tablets are up and we can enable replication.
        # All tables should be NOT_SERVING until we run InitShardMaster.
        for t in shard_tablets.all_tablets:
            t.wait_for_vttablet_state('NOT_SERVING')

        # Reparent to choose an initial master and enable replication.
        utils.run_vtctl([
            'InitShardMaster', '-force',
            'test_keyspace/%s' % shard_name, shard_tablets.master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # Enforce a health check instead of waiting for the next periodic one.
        # (saves up to 1 second execution time on average)
        for t in shard_tablets.replicas:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in shard_tablets.rdonlys:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # Wait for tablet state to change after starting all tablets. This allows
        # us to start all tablets at once, instead of sequentially waiting.
        # NOTE: Replication has to be enabled first or the health check will
        #       set a a replica or rdonly tablet back to NOT_SERVING.
        for t in shard_tablets.all_tablets:
            t.wait_for_vttablet_state('SERVING')

        create_table_sql = ('create table worker_test('
                            'id bigint unsigned,'
                            'msg varchar(64),'
                            'keyspace_id bigint(20) unsigned not null,'
                            'primary key (id),'
                            'index by_msg (msg)'
                            ') Engine=InnoDB')

        if create_table:
            utils.run_vtctl(
                ['ApplySchema', '-sql=' + create_table_sql, 'test_keyspace'],
                auto_log=True)
Beispiel #60
0
def init_keyspace():
    """Creates a `test_keyspace` keyspace with a sharding key."""
    utils.run_vtctl([
        'CreateKeyspace', '-sharding_column_name', 'keyspace_id',
        '-sharding_column_type', KEYSPACE_ID_TYPE, 'test_keyspace'
    ])