Python run_vtctl_json Examples, utils.run_vtctl_json Python Examples

Example #1

0

Show file

File: tabletmanager.py Project: Acidburn0zzz/vitess

  def test_scrap_and_reinit(self):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    tablet_62344.create_db('vt_test_keyspace')
    tablet_62044.create_db('vt_test_keyspace')

    # one master one replica
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('replica', 'test_keyspace', '0')

    # make sure the replica is in the replication graph
    before_scrap = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                         'test_keyspace/0'])
    self.assertEqual(1, len(before_scrap['ReplicationLinks']), 'wrong replication links before: %s' % str(before_scrap))

    # scrap and re-init
    utils.run_vtctl(['ScrapTablet', '-force', tablet_62044.tablet_alias])
    tablet_62044.init_tablet('replica', 'test_keyspace', '0')

    after_scrap = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                        'test_keyspace/0'])
    self.assertEqual(1, len(after_scrap['ReplicationLinks']), 'wrong replication links after: %s' % str(after_scrap))

    # manually add a bogus entry to the replication graph, and check
    # it is removed by ShardReplicationFix
    utils.run_vtctl(['ShardReplicationAdd', 'test_keyspace/0', 'test_nj-0000066666', 'test_nj-0000062344'], auto_log=True)
    with_bogus = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                        'test_keyspace/0'])
    self.assertEqual(2, len(with_bogus['ReplicationLinks']), 'wrong replication links with bogus: %s' % str(with_bogus))
    utils.run_vtctl(['ShardReplicationFix', 'test_nj', 'test_keyspace/0'], auto_log=True)
    after_fix = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                        'test_keyspace/0'])
    self.assertEqual(1, len(after_scrap['ReplicationLinks']), 'wrong replication links after fix: %s' % str(after_fix))

Example #2

0

Show file

File: reparent.py Project: hadmagic/vitess

    def _test_reparent_from_outside_check(self, brutal, base_time):

        # make sure the shard replication graph is fine
        shard_replication = utils.run_vtctl_json(["GetShardReplication", "test_nj", "test_keyspace/0"])
        hashed_nodes = {}
        for node in shard_replication["nodes"]:
            key = node["tablet_alias"]["cell"] + "-" + str(node["tablet_alias"]["uid"])
            hashed_nodes[key] = True
        logging.debug("Got shard replication nodes: %s", str(hashed_nodes))
        expected_nodes = {"test_nj-41983": True, "test_nj-62044": True}
        if not brutal:
            expected_nodes["test_nj-62344"] = True
        self.assertEqual(
            expected_nodes, hashed_nodes, "Got unexpected nodes: %s != %s" % (str(expected_nodes), str(hashed_nodes))
        )

        # make sure the master status page says it's the master
        tablet_62044_master_status = tablet_62044.get_status()
        self.assertIn("Serving graph: test_keyspace 0 master", tablet_62044_master_status)

        # make sure the master health stream says it's the master too
        # (health check is disabled on these servers, force it first)
        utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"])
        health = utils.run_vtctl_json(["VtTabletStreamHealth", "-count", "1", tablet_62044.tablet_alias])
        self.assertEqual(health["target"]["tablet_type"], topodata_pb2.MASTER)
        # have to compare the int version, or the rounding errors can break
        self.assertTrue(health["tablet_externally_reparented_timestamp"] >= int(base_time))

Example #3

0

Show file

File: tabletmanager.py Project: alainjobart/vitess

  def test_shard_replication_fix(self):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    tablet_62344.create_db('vt_test_keyspace')
    tablet_62044.create_db('vt_test_keyspace')

    # one master one replica
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('replica', 'test_keyspace', '0')

    # make sure the replica is in the replication graph
    before_bogus = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                         'test_keyspace/0'])
    self.assertEqual(2, len(before_bogus['nodes']),
                     'wrong shard replication nodes before: %s' %
                     str(before_bogus))

    # manually add a bogus entry to the replication graph, and check
    # it is removed by ShardReplicationFix
    utils.run_vtctl(['ShardReplicationAdd', 'test_keyspace/0',
                     'test_nj-0000066666'], auto_log=True)
    with_bogus = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                       'test_keyspace/0'])
    self.assertEqual(3, len(with_bogus['nodes']),
                     'wrong shard replication nodes with bogus: %s' %
                     str(with_bogus))
    utils.run_vtctl(['ShardReplicationFix', 'test_nj', 'test_keyspace/0'],
                    auto_log=True)
    after_fix = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                      'test_keyspace/0'])
    self.assertEqual(2, len(after_fix['nodes']),
                     'wrong shard replication nodes after fix: %s' %
                     str(after_fix))

Example #4

0

Show file

  def _test_sanity(self):
    # Start up a master mysql and vttablet
    utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
    utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
    tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
    utils.validate_topology()
    srvShard = utils.run_vtctl_json(['GetSrvShard', 'test_nj',
                                     'test_keyspace/0'])
    self.assertEqual(srvShard['MasterCell'], 'test_nj')

    # if these statements don't run before the tablet it will wedge
    # waiting for the db to become accessible. this is more a bug than
    # a feature.
    tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
                          self._populate_vt_select_test)

    tablet_62344.start_vttablet()

    # make sure the query service is started right away
    result, _ = utils.run_vtctl(['Query', 'test_nj', 'test_keyspace',
                                 'select * from vt_select_test'],
                                mode=utils.VTCTL_VTCTL, trap_output=True)
    rows = result.splitlines()
    self.assertEqual(len(rows), 5, "expected 5 rows in vt_select_test: %s %s" %
                     (str(rows), result))

    # make sure direct dba queries work
    query_result = utils.run_vtctl_json(['ExecuteFetch', '-want_fields', tablet_62344.tablet_alias, 'select * from vt_test_keyspace.vt_select_test'])
    self.assertEqual(len(query_result['Rows']), 4, "expected 4 rows in vt_select_test: %s" % str(query_result))
    self.assertEqual(len(query_result['Fields']), 2, "expected 2 fields in vt_select_test: %s" % str(query_result))

    # check Pings
    utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
    utils.run_vtctl(['RpcPing', tablet_62344.tablet_alias])

    # Quickly check basic actions.
    utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias])
    utils.wait_db_read_only(62344)

    utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias])
    utils.check_db_read_write(62344)

    utils.run_vtctl(['DemoteMaster', tablet_62344.tablet_alias])
    utils.wait_db_read_only(62344)

    utils.validate_topology()
    utils.run_vtctl(['ValidateKeyspace', 'test_keyspace'])
    # not pinging tablets, as it enables replication checks, and they
    # break because we only have a single master, no slaves
    utils.run_vtctl(['ValidateShard', '-ping-tablets=false',
                     'test_keyspace/0'])
    srvShard = utils.run_vtctl_json(['GetSrvShard', 'test_nj',
                                     'test_keyspace/0'])
    self.assertEqual(srvShard['MasterCell'], 'test_nj')

    tablet_62344.kill_vttablet()

    tablet_62344.init_tablet('idle')
    tablet_62344.scrap(force=True)

Example #5

0

Show file

File: reparent.py Project: DalianDragon/vitess

  def _test_reparent_from_outside_check(self, brutal, base_time):

    # make sure the shard replication graph is fine
    shard_replication = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                              'test_keyspace/0'])
    hashed_nodes = {}
    for node in shard_replication['nodes']:
      key = node['tablet_alias']['cell']+'-'+str(node['tablet_alias']['uid'])
      hashed_nodes[key] = True
    logging.debug('Got shard replication nodes: %s', str(hashed_nodes))
    expected_nodes = {
        'test_nj-41983': True,
        'test_nj-62044': True,
        }
    if not brutal:
      expected_nodes['test_nj-62344'] = True
    self.assertEqual(expected_nodes, hashed_nodes,
                     'Got unexpected nodes: %s != %s' % (str(expected_nodes),
                                                         str(hashed_nodes)))

    # make sure the master status page says it's the master
    tablet_62044_master_status = tablet_62044.get_status()
    self.assertIn('Serving graph: test_keyspace 0 master',
                  tablet_62044_master_status)

    # make sure the master health stream says it's the master too
    # (health check is disabled on these servers, force it first)
    utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'])
    health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                   '-count', '1',
                                   tablet_62044.tablet_alias])
    self.assertEqual(health['target']['tablet_type'], topodata_pb2.MASTER)
    # have to compare the int version, or the rounding errors can break
    self.assertTrue(
        health['tablet_externally_reparented_timestamp'] >= int(base_time))

Example #6

0

Show file

File: schema.py Project: gitql/vitess

  def test_vtctl_copyschemashard_different_dbs_should_fail(self):
    # Apply initial schema to the whole keyspace before creating shard 2.
    self._apply_initial_schema()

    _setup_shard_2()

    try:
      # InitShardMaster creates the db, but there shouldn't be any tables yet.
      self._check_tables(shard_2_master, 0)
      self._check_tables(shard_2_replica1, 0)

      # Change the db charset on the destination shard from utf8 to latin1.
      # This will make CopySchemaShard fail during its final diff.
      # (The different charset won't be corrected on the destination shard
      #  because we use "CREATE DATABASE IF NOT EXISTS" and this doesn't fail if
      #  there are differences in the options e.g. the character set.)
      shard_2_schema = self._get_schema(shard_2_master.tablet_alias)
      self.assertIn('utf8', shard_2_schema['database_schema'])
      utils.run_vtctl_json(
          ['ExecuteFetchAsDba', '-json', shard_2_master.tablet_alias,
           'ALTER DATABASE vt_test_keyspace CHARACTER SET latin1'])

      _, stderr = utils.run_vtctl(['CopySchemaShard',
                                   'test_keyspace/0',
                                   'test_keyspace/2'],
                                  expect_fail=True,
                                  auto_log=True)
      self.assertIn('source and dest don\'t agree on database creation command',
                    stderr)

      # shard_2_master should have the same number of tables. Only the db
      # character set is different.
      self._check_tables(shard_2_master, 4)
    finally:
      _teardown_shard_2()

Example #7

0

Show file

File: tabletmanager.py Project: alainjobart/vitess

  def test_master_restart_sets_ter_timestamp(self):
    """Test that TER timestamp is set when we restart the MASTER vttablet.

    TER = TabletExternallyReparented.
    See StreamHealthResponse.tablet_externally_reparented_timestamp for details.
    """
    master, replica = tablet_62344, tablet_62044
    tablets = [master, replica]
    # Start vttablets. Our future master is initially a REPLICA.
    for t in tablets:
      t.create_db('vt_test_keyspace')
    for t in tablets:
      t.start_vttablet(wait_for_state='NOT_SERVING',
                       init_tablet_type='replica',
                       init_keyspace='test_keyspace',
                       init_shard='0')

    # Initialize tablet as MASTER.
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
                     master.tablet_alias])
    master.wait_for_vttablet_state('SERVING')

    # Capture the current TER.
    health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                   '-count', '1',
                                   master.tablet_alias])
    self.assertEqual(topodata_pb2.MASTER, health['target']['tablet_type'])
    self.assertIn('tablet_externally_reparented_timestamp', health)
    self.assertGreater(health['tablet_externally_reparented_timestamp'], 0,
                       'TER on MASTER must be set after InitShardMaster')

    # Restart the MASTER vttablet.
    master.kill_vttablet()
    master.start_vttablet(wait_for_state='SERVING',
                          init_tablet_type='replica',
                          init_keyspace='test_keyspace',
                          init_shard='0')

    # Make sure that the TER increased i.e. it was set to the current time.
    health_after_restart = utils.run_vtctl_json(['VtTabletStreamHealth',
                                                 '-count', '1',
                                                 master.tablet_alias])
    self.assertEqual(topodata_pb2.MASTER,
                     health_after_restart['target']['tablet_type'])
    self.assertIn('tablet_externally_reparented_timestamp',
                  health_after_restart)
    self.assertGreater(
        health_after_restart['tablet_externally_reparented_timestamp'],
        health['tablet_externally_reparented_timestamp'],
        'When the MASTER vttablet was restarted, the TER timestamp must be set'
        ' to the current time.')

    # Shutdown.
    for t in tablets:
      t.kill_vttablet()

Example #8

0

Show file

File: reparent.py Project: hadmagic/vitess

    def test_reparent_cross_cell(self, shard_id="0"):
        utils.run_vtctl(["CreateKeyspace", "test_keyspace"])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db("vt_test_keyspace")
        tablet_62044.create_db("vt_test_keyspace")
        tablet_41983.create_db("vt_test_keyspace")
        tablet_31981.create_db("vt_test_keyspace")

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet("master", "test_keyspace", shard_id, start=True, wait_for_start=False)
        shard = utils.run_vtctl_json(["GetShard", "test_keyspace/" + shard_id])
        self.assertEqual(shard["cells"], ["test_nj"], "wrong list of cell in Shard: %s" % str(shard["cells"]))

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet("replica", "test_keyspace", shard_id, start=True, wait_for_start=False)
        tablet_41983.init_tablet("replica", "test_keyspace", shard_id, start=True, wait_for_start=False)
        tablet_31981.init_tablet("replica", "test_keyspace", shard_id, start=True, wait_for_start=False)
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state("SERVING")
        shard = utils.run_vtctl_json(["GetShard", "test_keyspace/" + shard_id])
        self.assertEqual(
            shard["cells"], ["test_nj", "test_ny"], "wrong list of cell in Shard: %s" % str(shard["cells"])
        )

        # Recompute the shard layout node - until you do that, it might not be
        # valid.
        utils.run_vtctl(["RebuildShardGraph", "test_keyspace/" + shard_id])
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.run_vtctl(["InitShardMaster", "test_keyspace/" + shard_id, tablet_62344.tablet_alias], auto_log=True)
        utils.validate_topology(ping_tablets=True)

        self._check_db_addr(shard_id, "master", tablet_62344.port)

        # Verify MasterCell is properly set
        self._check_master_cell("test_nj", shard_id, "test_nj")
        self._check_master_cell("test_ny", shard_id, "test_nj")

        # Perform a graceful reparent operation to another cell.
        utils.pause("test_reparent_cross_cell PlannedReparentShard")
        utils.run_vtctl(["PlannedReparentShard", "test_keyspace/" + shard_id, tablet_31981.tablet_alias], auto_log=True)
        utils.validate_topology()

        self._check_db_addr(shard_id, "master", tablet_31981.port, cell="test_ny")

        # Verify MasterCell is set to new cell.
        self._check_master_cell("test_nj", shard_id, "test_ny")
        self._check_master_cell("test_ny", shard_id, "test_ny")

        tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983, tablet_31981])

Example #9

0

Show file

File: reparent.py Project: abedultamimi/vitess

  def test_reparent_cross_cell(self, shard_id='0'):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as they are serving
    tablet_62344.create_db('vt_test_keyspace')
    tablet_62044.create_db('vt_test_keyspace')
    tablet_41983.create_db('vt_test_keyspace')
    tablet_31981.create_db('vt_test_keyspace')

    # Start up a master mysql and vttablet
    tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
    self.assertEqual(shard['cells'], ['test_nj'],
                     'wrong list of cell in Shard: %s' % str(shard['cells']))

    # Create a few slaves for testing reparenting. Won't be healthy
    # as replication is not running.
    tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_62344.wait_for_vttablet_state('SERVING')
    for t in [tablet_62044, tablet_41983, tablet_31981]:
      t.wait_for_vttablet_state('NOT_SERVING')
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
    self.assertEqual(
        shard['cells'], ['test_nj', 'test_ny'],
        'wrong list of cell in Shard: %s' % str(shard['cells']))

    utils.validate_topology()

    # Force the slaves to reparent assuming that all the datasets are
    # identical.
    for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
      t.reset_replication()
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/' + shard_id,
                     tablet_62344.tablet_alias], auto_log=True)
    utils.validate_topology(ping_tablets=True)

    self._check_master_tablet(tablet_62344)

    # Perform a graceful reparent operation to another cell.
    utils.pause('test_reparent_cross_cell PlannedReparentShard')
    utils.run_vtctl(['PlannedReparentShard', 'test_keyspace/' + shard_id,
                     tablet_31981.tablet_alias], auto_log=True)
    utils.validate_topology()

    self._check_master_tablet(tablet_31981)

    tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
                         tablet_31981])

Example #10

0

Show file

File: reparent.py Project: abedultamimi/vitess

  def _check_master_tablet(self, t, port=None):
    """Makes sure the tablet type is master, and its health check agrees."""
    ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
    self.assertEqual(ti['type'], topodata_pb2.MASTER)
    if port:
      self.assertEqual(ti['port_map']['vt'], port)

    # make sure the health stream is updated
    health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1',
                                   t.tablet_alias])
    self.assertIn('serving', health)
    self.assertEqual(health['target']['tablet_type'], topodata_pb2.MASTER)

Example #11

0

Show file

File: tabletmanager.py Project: AndreMouche/vitess

  def test_health_check(self):
    utils.run_vtctl('CreateKeyspace test_keyspace')

    # one master, one replica that starts in spare
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('spare', 'test_keyspace', '0')

    for t in tablet_62344, tablet_62044:
      t.create_db('vt_test_keyspace')

    tablet_62344.start_vttablet(wait_for_state=None, target_tablet_type='replica')
    tablet_62044.start_vttablet(wait_for_state=None, target_tablet_type='replica')

    tablet_62344.wait_for_vttablet_state('SERVING')
    tablet_62044.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0', tablet_62344.tablet_alias])

    # make sure the 'spare' slave goes to 'replica'
    timeout = 10
    while True:
      ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
      if ti['Type'] == "replica":
        logging.info("Slave tablet went to replica, good")
        break
      timeout = utils.wait_step('slave tablet going to replica', timeout)

    # make sure the master is still master
    ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias])
    self.assertEqual(ti['Type'], 'master', "unexpected master type: %s" % ti['Type'])

    # stop replication on the slave, see it trigger the slave going
    # slightly unhealthy
    tablet_62044.mquery('', 'stop slave')
    timeout = 10
    while True:
      ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
      if 'Health' in ti and ti['Health']:
        if 'replication_lag' in ti['Health']:
          if ti['Health']['replication_lag'] == 'high':
            logging.info("Slave tablet replication_lag went to high, good")
            break
      timeout = utils.wait_step('slave has high replication lag', timeout)

    # make sure the serving graph was updated
    ep = utils.run_vtctl_json(['GetEndPoints', 'test_nj', 'test_keyspace/0', 'replica'])
    if not ep['entries'][0]['health']:
      self.fail('Replication lag parameter not propagated to serving graph: %s' % str(ep))
    self.assertEqual(ep['entries'][0]['health']['replication_lag'], 'high', 'Replication lag parameter not propagated to serving graph: %s' % str(ep))

    tablet.kill_tablets([tablet_62344, tablet_62044])

Example #12

0

Show file

File: tabletmanager.py Project: pranjal5215/vitess

    def test_scrap(self):
        # Start up a master mysql and vttablet
        utils.run_vtctl(["CreateKeyspace", "test_keyspace"])

        tablet_62344.init_tablet("master", "test_keyspace", "0")
        tablet_62044.init_tablet("replica", "test_keyspace", "0")
        utils.run_vtctl(["RebuildShardGraph", "test_keyspace/*"])
        utils.validate_topology()
        srvShard = utils.run_vtctl_json(["GetSrvShard", "test_nj", "test_keyspace/0"])
        self.assertEqual(srvShard["MasterCell"], "test_nj")

        tablet_62044.scrap(force=True)
        utils.validate_topology()
        srvShard = utils.run_vtctl_json(["GetSrvShard", "test_nj", "test_keyspace/0"])
        self.assertEqual(srvShard["MasterCell"], "test_nj")

Example #13

0

Show file

File: tabletmanager.py Project: cofyc/vitess

  def test_scrap(self):
    # Start up a master mysql and vttablet
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('replica', 'test_keyspace', '0')
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/*'])
    utils.validate_topology()
    srvShard = utils.run_vtctl_json(['GetSrvShard', 'test_nj', 'test_keyspace/0'])
    self.assertEqual(srvShard['MasterCell'], 'test_nj')

    tablet_62044.scrap(force=True)
    utils.validate_topology()
    srvShard = utils.run_vtctl_json(['GetSrvShard', 'test_nj', 'test_keyspace/0'])
    self.assertEqual(srvShard['MasterCell'], 'test_nj')

Example #14

0

Show file

File: tabletmanager.py Project: Rastusik/vitess

  def test_health_check_uid_collision(self):
    # If two tablets are running with the same UID, we should prevent the
    # healthcheck on the older one from modifying the tablet record after the
    # record has been claimed by a newer instance.
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    for t in tablet_62344, tablet_62044:
      t.create_db('vt_test_keyspace')

    # Before starting tablets, simulate another tablet
    # owning the replica's record.
    utils.run_vtctl(['InitTablet', '-allow_update', '-hostname', 'localhost',
                     '-keyspace', 'test_keyspace', '-shard', '0', '-port', '0',
                     '-parent', tablet_62044.tablet_alias, 'replica'])

    # Set up tablets.
    tablet_62344.start_vttablet(wait_for_state=None,
                                target_tablet_type='replica')
    tablet_62044.start_vttablet(wait_for_state=None,
                                target_tablet_type='replica',
                                init_keyspace='test_keyspace',
                                init_shard='0')
    tablet_62344.wait_for_vttablet_state('SERVING')
    tablet_62044.wait_for_vttablet_state('NOT_SERVING')
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/0',
                     tablet_62344.tablet_alias])
    tablet_62044.wait_for_vttablet_state('SERVING')

    # Check that the tablet owns the record.
    tablet_record = utils.run_vtctl_json(['GetTablet',
                                          tablet_62044.tablet_alias])
    self.assertEquals(tablet_record['port_map']['vt'], tablet_62044.port,
                      "tablet didn't take over the record")

    # Take away ownership again.
    utils.run_vtctl(['InitTablet', '-allow_update', '-hostname', 'localhost',
                     '-keyspace', 'test_keyspace', '-shard', '0', '-port', '0',
                     '-parent', tablet_62044.tablet_alias, 'replica'])

    # Tell the tablets to shutdown gracefully,
    # which normally includes going SPARE.
    tablet.kill_tablets([tablet_62344, tablet_62044])

    # Make sure the tablet record hasn't been touched.
    tablet_record = utils.run_vtctl_json(['GetTablet',
                                          tablet_62044.tablet_alias])
    self.assertEquals(tablet_record['type'],
                      tablet_62044.tablet_type_value['REPLICA'],
                      'tablet changed record without owning it')

Example #15

0

Show file

File: binlog.py Project: chrisgillis/vitess

  def test_charset(self):
    start_position = mysql_flavor().master_position(dst_replica)
    logging.debug('test_charset: starting @ %s', start_position)

    # Insert something that will replicate incorrectly if the charset is not
    # propagated through binlog streamer to the destination.
    #
    # Vitess tablets default to using utf8, so we insert something crazy and
    # pretend it's latin1. If the binlog player doesn't also pretend it's
    # latin1, it will be inserted as utf8, which will change its value.
    src_master.mquery(
        'vt_test_keyspace',
        "INSERT INTO test_table (id, keyspace_id, msg) "
        "VALUES (41523, 1, 'Šṛ́rỏé') /* vtgate:: keyspace_id:00000001 */",
        conn_params={'charset': 'latin1'}, write=True)

    # Wait for it to replicate.
    event = utils.run_vtctl_json(['VtTabletUpdateStream',
                                  '-position', start_position,
                                  '-count', '1',
                                  dst_replica.tablet_alias])
    self.assertIn('event_token', event)
    self.assertIn('timestamp', event['event_token'])

    # Check the value.
    data = dst_master.mquery(
        'vt_test_keyspace',
        'SELECT id, keyspace_id, msg FROM test_table WHERE id=41523 LIMIT 1')
    self.assertEqual(len(data), 1, 'No data replicated.')
    self.assertEqual(len(data[0]), 3, 'Wrong number of columns.')
    self.assertEqual(data[0][2], 'Šṛ́rỏé',
                     'Data corrupted due to wrong charset.')

Example #16

0

Show file

File: base_sharding.py Project: CowLeo/vitess

  def check_stream_health_equals_binlog_player_vars(self, tablet_obj, count):
    """Checks the variables exported by streaming health check match vars.

    Args:
      tablet_obj: the tablet to check.
      count: number of binlog players to expect.
    """

    blp_stats = utils.get_vars(tablet_obj.port)
    self.assertEqual(blp_stats['BinlogPlayerMapSize'], count)

    # Enforce health check because it's not running by default as
    # tablets may not be started with it, or may not run it in time.
    utils.run_vtctl(['RunHealthCheck', tablet_obj.tablet_alias])
    stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                          '-count', '1',
                                          tablet_obj.tablet_alias])
    logging.debug('Got health: %s', str(stream_health))
    self.assertNotIn('serving', stream_health)
    self.assertIn('realtime_stats', stream_health)
    self.assertNotIn('health_error', stream_health['realtime_stats'])
    self.assertIn('binlog_players_count', stream_health['realtime_stats'])
    self.assertEqual(blp_stats['BinlogPlayerMapSize'],
                     stream_health['realtime_stats']['binlog_players_count'])
    self.assertEqual(blp_stats['BinlogPlayerSecondsBehindMaster'],
                     stream_health['realtime_stats'].get(
                         'seconds_behind_master_filtered_replication', 0))

Example #17

0

Show file

File: reparent.py Project: haoqoo/vitess

  def _test_reparent_from_outside_check(self, brutal):
    if environment.topo_server().flavor() != 'zookeeper':
      return

    # make sure the shard replication graph is fine
    shard_replication = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
                                              'test_keyspace/0'])
    hashed_nodes = {}
    for node in shard_replication['nodes']:
      key = node['tablet_alias']['cell']+'-'+str(node['tablet_alias']['uid'])
      hashed_nodes[key] = True
    logging.debug('Got shard replication nodes: %s', str(hashed_nodes))
    expected_nodes = {
        'test_nj-41983': True,
        'test_nj-62044': True,
        }
    if not brutal:
      expected_nodes['test_nj-62344'] = True
    self.assertEqual(expected_nodes, hashed_nodes,
                     'Got unexpected nodes: %s != %s' % (str(expected_nodes),
                                                         str(hashed_nodes)))

    tablet_62044_master_status = tablet_62044.get_status()
    self.assertIn('Serving graph: test_keyspace 0 master',
                  tablet_62044_master_status)

Example #18

0

Show file

File: tablet.py Project: gitql/vitess

  def execute(self, sql, bindvars=None, transaction_id=None,
              execute_options=None, auto_log=True):
    """execute uses 'vtctl VtTabletExecute' to execute a command.

    Args:
      sql: the command to execute.
      bindvars: a dict of bind variables.
      transaction_id: the id of the transaction to use if necessary.
      execute_options: proto-encoded ExecuteOptions object.
      auto_log: passed to run_vtctl.

    Returns:
      the result of running vtctl command.
    """
    args = [
        'VtTabletExecute', '-json',
    ]
    if bindvars:
      args.extend(['-bind_variables', json.dumps(bindvars)])
    if transaction_id:
      args.extend(['-transaction_id', str(transaction_id)])
    if execute_options:
      args.extend(['-options', execute_options])
    args.extend([self.tablet_alias, sql])
    return utils.run_vtctl_json(args, auto_log=auto_log)

Example #19

0

Show file

File: vertical_split.py Project: benyu/vitess

  def _verify_vtctl_set_shard_tablet_control(self):
    """Test that manually editing the blacklisted tables works correctly.

    TODO(mberlin): This is more an integration test and should be moved to the
    Go codebase eventually.
    """
    # check 'vtctl SetShardTabletControl' command works as expected:
    # clear the rdonly entry:
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'rdonly'], auto_log=True)
    self._assert_tablet_controls([topodata_pb2.MASTER, topodata_pb2.REPLICA])

    # re-add rdonly:
    utils.run_vtctl(['SetShardTabletControl', '--tables=moving.*,view1',
                     'source_keyspace/0', 'rdonly'], auto_log=True)
    self._assert_tablet_controls([topodata_pb2.MASTER, topodata_pb2.REPLICA,
                                  topodata_pb2.RDONLY])

    # and then clear all entries:
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'replica'], auto_log=True)
    utils.run_vtctl(['SetShardTabletControl', '--remove', 'source_keyspace/0',
                     'master'], auto_log=True)
    shard_json = utils.run_vtctl_json(['GetShard', 'source_keyspace/0'])
    self.assertNotIn('tablet_controls', shard_json)

Example #20

0

Show file

File: vertical_split.py Project: benyu/vitess

 def _check_srv_keyspace(self, expected):
   cell = 'test_nj'
   keyspace = 'destination_keyspace'
   ks = utils.run_vtctl_json(['GetSrvKeyspace', cell, keyspace])
   result = ''
   if 'served_from' in ks and ks['served_from']:
     a = []
     for served_from in sorted(ks['served_from']):
       tt = topodata_pb2.TabletType.Name(served_from['tablet_type']).lower()
       if tt == 'batch':
         tt = 'rdonly'
       a.append('ServedFrom(%s): %s\n' % (tt, served_from['keyspace']))
     for line in sorted(a):
       result += line
   logging.debug('Cell %s keyspace %s has data:\n%s', cell, keyspace, result)
   self.assertEqual(
       expected, result,
       'Mismatch in srv keyspace for cell %s keyspace %s, expected:\n'
       '%s\ngot:\n%s' % (
           cell, keyspace, expected, result))
   self.assertNotIn('sharding_column_name', ks,
                    'Got a sharding_column_name in SrvKeyspace: %s' %
                    str(ks))
   self.assertNotIn('sharding_column_type', ks,
                    'Got a sharding_column_type in SrvKeyspace: %s' %
                    str(ks))

Example #21

0

Show file

File: vertical_split.py Project: minmaxflow/vitess

 def _check_blacklisted_tables(self, tablet, expected):
   ti = utils.run_vtctl_json(['GetTablet', tablet.tablet_alias])
   logging.debug("Tablet %s has balcklisted tables: %s", tablet.tablet_alias,
                 ti['BlacklistedTables'])
   self.assertEqual(ti['BlacklistedTables'], expected,
                    "Got unexpected BlacklistedTables: %s (expecting %s)" %(
                        ti['BlacklistedTables'], expected))

Example #22

0

Show file

File: tabletmanager.py Project: pranjal5215/vitess

    def test_no_mysql_healthcheck(self):
        """This test starts a vttablet with no mysql port, while mysql is down.
    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
        # we need replication to be enabled, so the slave tablet can be healthy.
        for t in tablet_62344, tablet_62044:
            t.create_db("vt_test_keyspace")
        pos = mysql_flavor().master_position(tablet_62344)
        changeMasterCmds = mysql_flavor().change_master_commands(utils.hostname, tablet_62344.mysql_port, pos)
        tablet_62044.mquery("", ["RESET MASTER", "RESET SLAVE"] + changeMasterCmds + ["START SLAVE"])

        # now shutdown all mysqld
        shutdown_procs = [tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql()]
        utils.wait_procs(shutdown_procs)

        # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
        tablet_62344.init_tablet("master", "test_keyspace", "0")
        tablet_62044.init_tablet("spare", "test_keyspace", "0", include_mysql_port=False)
        for t in tablet_62344, tablet_62044:
            t.start_vttablet(
                wait_for_state=None, target_tablet_type="replica", full_mycnf_args=True, include_mysql_port=False
            )
        for t in tablet_62344, tablet_62044:
            t.wait_for_vttablet_state("NOT_SERVING")
            self.check_healthz(t, False)

        # restart mysqld
        start_procs = [tablet_62344.start_mysql(), tablet_62044.start_mysql()]
        utils.wait_procs(start_procs)

        # the master should still be healthy
        utils.run_vtctl(["RunHealthCheck", tablet_62344.tablet_alias, "replica"], auto_log=True)
        self.check_healthz(tablet_62344, True)

        # the slave won't be healthy at first, as replication is not running
        utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True)
        self.check_healthz(tablet_62044, False)
        tablet_62044.wait_for_vttablet_state("NOT_SERVING")

        # restart replication
        tablet_62044.mquery("", ["START SLAVE"])

        # wait for the tablet to become healthy and fix its mysql port
        utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True)
        tablet_62044.wait_for_vttablet_state("SERVING")
        self.check_healthz(tablet_62044, True)

        for t in tablet_62344, tablet_62044:
            # wait for mysql port to show up
            timeout = 10
            while True:
                ti = utils.run_vtctl_json(["GetTablet", t.tablet_alias])
                if "mysql" in ti["Portmap"]:
                    break
                timeout = utils.wait_step("mysql port in tablet record", timeout)
            self.assertEqual(ti["Portmap"]["mysql"], t.mysql_port)

        # all done
        tablet.kill_tablets([tablet_62344, tablet_62044])

Example #23

0

Show file

File: tabletmanager.py Project: Acidburn0zzz/vitess

  def test_vtaction_dies_hard(self):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as it is serving
    tablet_62344.create_db('vt_test_keyspace')

    tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)

    # start a 'vtctl Sleep' command, don't wait for it
    action_path, _ = utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '60s'], trap_output=True)
    action_path = action_path.strip()

    # wait for the action to be 'Running', capture its pid
    timeout = 10
    while True:
      an = utils.run_vtctl_json(['ReadTabletAction', action_path])
      if an.get('State', None) == 'Running':
        pid = an['Pid']
        logging.info("Action is running with pid %u, good", pid)
        break
      timeout = utils.wait_step('sleep action to run', timeout)

    # let's kill it hard, wait until it's gone for good
    os.kill(pid, signal.SIGKILL)
    try:
      os.waitpid(pid, 0)
    except OSError:
      # this means the process doesn't exist any more, we're good
      pass

    # Then let's make sure the next action cleans up properly and can execute.
    # If that doesn't work, this will time out and the test will fail.
    utils.run_vtctl(['Ping', tablet_62344.tablet_alias])

    tablet_62344.kill_vttablet()

Example #24

0

Show file

File: tabletmanager.py Project: Acidburn0zzz/vitess

  def test_sigterm(self):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as it is serving
    tablet_62344.create_db('vt_test_keyspace')

    tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)

    # start a 'vtctl Sleep' command, don't wait for it
    action_path, _ = utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '60s'], trap_output=True)
    action_path = action_path.strip()

    # wait for the action to be 'Running', capture its pid
    timeout = 10
    while True:
      an = utils.run_vtctl_json(['ReadTabletAction', action_path])
      if an.get('State', None) == 'Running':
        pid = an['Pid']
        logging.info("Action is running with pid %u, good", pid)
        break
      timeout = utils.wait_step('sleep action to run', timeout)

    # let's kill the vtaction process with a regular SIGTERM
    os.kill(pid, signal.SIGTERM)

    # check the vtctl command got the right remote error back
    out, err = utils.run_vtctl(['WaitForAction', action_path], trap_output=True,
                               raise_on_error=False)
    if "vtaction interrupted by signal" not in err:
      self.fail("cannot find expected output in error: " + err)
    logging.debug("vtaction was interrupted correctly:\n" + err)

    tablet_62344.kill_vttablet()

Example #25

0

Show file

  def test_actions_and_timeouts(self):
    # Start up a master mysql and vttablet
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
    utils.validate_topology()
    srvShard = utils.run_vtctl_json(['GetSrvShard', 'test_nj',
                                     'test_keyspace/0'])
    self.assertEqual(srvShard['MasterCell'], 'test_nj')
    tablet_62344.create_db('vt_test_keyspace')
    tablet_62344.start_vttablet()

    utils.run_vtctl(['RpcPing', tablet_62344.tablet_alias])

    # schedule long action in the background, sleep a little bit to make sure
    # it started to run
    args = (environment.binary_args('vtctl') +
            environment.topo_server_flags() +
            environment.tablet_manager_protocol_flags() +
            environment.tabletconn_protocol_flags() +
            ['-log_dir', environment.vtlogroot,
             'Sleep', tablet_62344.tablet_alias, '10s'])
    bg = utils.run_bg(args)
    time.sleep(3)

    # try a frontend RpcPing that should timeout as the tablet is busy
    # running the other one
    stdout, stderr = utils.run_vtctl(['-wait-time', '3s',
                                      'RpcPing', tablet_62344.tablet_alias],
                                     expect_fail=True)
    if 'Timeout waiting for' not in stderr:
      self.fail("didn't find the right error strings in failed RpcPing: " +
                stderr)

    # wait for the background vtctl
    bg.wait()

    if environment.topo_server_implementation == 'zookeeper':
      # extra small test: we ran for a while, get the states we were in,
      # make sure they're accounted for properly
      # first the query engine States
      v = utils.get_vars(tablet_62344.port)
      logging.debug("vars: %s" % str(v))

      # then the Zookeeper connections
      if v['ZkMetaConn']['test_nj']['Current'] != 'Connected':
        self.fail('invalid zk test_nj state: %s' %
                  v['ZkMetaConn']['test_nj']['Current'])
      if v['ZkMetaConn']['global']['Current'] != 'Connected':
        self.fail('invalid zk global state: %s' %
                  v['ZkMetaConn']['global']['Current'])
      if v['ZkMetaConn']['test_nj']['DurationConnected'] < 10e9:
        self.fail('not enough time in Connected state: %u',
                  v['ZkMetaConn']['test_nj']['DurationConnected'])
      if v['TabletType'] != 'master':
        self.fail('TabletType not exported correctly')

    tablet_62344.kill_vttablet()

Example #26

0

Show file

File: tabletmanager.py Project: cofyc/vitess

  def test_restart_during_action(self):
    # Start up a master mysql and vttablet
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
    utils.validate_topology()
    srvShard = utils.run_vtctl_json(['GetSrvShard', 'test_nj', 'test_keyspace/0'])
    self.assertEqual(srvShard['MasterCell'], 'test_nj')
    tablet_62344.create_db('vt_test_keyspace')
    tablet_62344.start_vttablet()

    utils.run_vtctl(['Ping', tablet_62344.tablet_alias])

    # schedule long action
    utils.run_vtctl(['-no-wait', 'Sleep', tablet_62344.tablet_alias, '15s'], stdout=utils.devnull)
    # ping blocks until the sleep finishes unless we have a schedule race
    action_path, _ = utils.run_vtctl(['-no-wait', 'Ping', tablet_62344.tablet_alias], trap_output=True)
    action_path = action_path.strip()

    # kill agent leaving vtaction running
    tablet_62344.kill_vttablet()

    # restart agent
    tablet_62344.start_vttablet()

    # we expect this action with a short wait time to fail. this isn't the best
    # and has some potential for flakiness.
    utils.run_vtctl(['-wait-time', '2s', 'WaitForAction', action_path],
                    expect_fail=True)

    # wait until the background sleep action is done, otherwise there will be
    # a leftover vtaction whose result may overwrite running actions
    # NOTE(alainjobart): Yes, I've seen it happen, it's a pain to debug:
    # the zombie Sleep clobbers the Clone command in the following tests
    utils.run_vtctl(['-wait-time', '20s', 'WaitForAction', action_path],
                    auto_log=True)

    if environment.topo_server_implementation == 'zookeeper':
      # extra small test: we ran for a while, get the states we were in,
      # make sure they're accounted for properly
      # first the query engine States
      v = utils.get_vars(tablet_62344.port)
      logging.debug("vars: %s" % str(v))

      # then the Zookeeper connections
      if v['ZkMetaConn']['test_nj']['Current'] != 'Connected':
        self.fail('invalid zk test_nj state: %s' %
                  v['ZkMetaConn']['test_nj']['Current'])
      if v['ZkMetaConn']['global']['Current'] != 'Connected':
        self.fail('invalid zk global state: %s' %
                  v['ZkMetaConn']['global']['Current'])
      if v['ZkMetaConn']['test_nj']['DurationConnected'] < 10e9:
        self.fail('not enough time in Connected state: %u',
                  v['ZkMetaConn']['test_nj']['DurationConnected'])
      if v['TabletType'] != 'master':
        self.fail('TabletType not exported correctly')

    tablet_62344.kill_vttablet()

Example #27

0

Show file

File: reparent.py Project: ninqing/vitess

 def _check_db_addr(self, shard, db_type, expected_port, cell="test_nj"):
     ep = utils.run_vtctl_json(["GetEndPoints", cell, "test_keyspace/" + shard, db_type])
     self.assertEqual(len(ep["entries"]), 1, "Wrong number of entries: %s" % str(ep))
     port = ep["entries"][0]["named_port_map"]["_vtocc"]
     self.assertEqual(port, expected_port, "Unexpected port: %u != %u from %s" % (port, expected_port, str(ep)))
     host = ep["entries"][0]["host"]
     if not host.startswith(utils.hostname):
         self.fail("Invalid hostname %s was expecting something starting with %s" % (host, utils.hostname))

Example #28

0

Show file

File: tabletmanager.py Project: cinderalla/vitess

 def wait_for_tablet_type_change(self, tablet_alias, expected_type):
   timeout = 10
   while True:
     ti = utils.run_vtctl_json(['GetTablet', tablet_alias])
     if ti['Type'] == expected_type:
       logging.debug("Slave tablet went to %s, good" % expected_type)
       break
     timeout = utils.wait_step('slave becomes ' + expected_type, timeout)

Example #29

0

Show file

File: reparent.py Project: Acidburn0zzz/vitess

 def _check_db_addr(self, shard, db_type, expected_port):
   ep = utils.run_vtctl_json(['GetEndPoints', 'test_nj', 'test_keyspace/'+shard, db_type])
   self.assertEqual(len(ep['entries']), 1 , 'Wrong number of entries: %s' % str(ep))
   port = ep['entries'][0]['named_port_map']['_vtocc']
   self.assertEqual(port, expected_port, 'Unexpected port: %u != %u from %s' % (port, expected_port, str(ep)))
   host = ep['entries'][0]['host']
   if not host.startswith(utils.hostname):
     self.fail('Invalid hostname %s was expecting something starting with %s' % (host, utils.hostname))

Example #30

0

Show file

File: tabletmanager.py Project: ateleshev/youtube-vitess

  def _test_sanity(self):
    # Start up a master mysql and vttablet
    utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
    utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
    tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
    utils.run_vtctl(
        ['RebuildKeyspaceGraph', '-rebuild_srv_shards', 'test_keyspace'])
    utils.validate_topology()
    self._check_srv_shard()

    # if these statements don't run before the tablet it will wedge
    # waiting for the db to become accessible. this is more a bug than
    # a feature.
    tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
                          self._populate_vt_select_test)

    tablet_62344.start_vttablet()

    # make sure the query service is started right away
    qr = tablet_62344.execute('select * from vt_select_test')
    self.assertEqual(len(qr['rows']), 4,
                     'expected 4 rows in vt_select_test: %s' % str(qr))

    # make sure direct dba queries work
    query_result = utils.run_vtctl_json(
        ['ExecuteFetchAsDba', '-json', tablet_62344.tablet_alias,
         'select * from vt_test_keyspace.vt_select_test'])
    self.assertEqual(
        len(query_result['rows']), 4,
        'expected 4 rows in vt_select_test: %s' % str(query_result))
    self.assertEqual(
        len(query_result['fields']), 2,
        'expected 2 fields in vt_select_test: %s' % str(query_result))

    # check Ping / RefreshState
    utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
    utils.run_vtctl(['RefreshState', tablet_62344.tablet_alias])

    # Quickly check basic actions.
    utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias])
    utils.wait_db_read_only(62344)

    utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias])
    utils.check_db_read_write(62344)

    utils.run_vtctl(['DemoteMaster', tablet_62344.tablet_alias])
    utils.wait_db_read_only(62344)

    utils.validate_topology()
    utils.run_vtctl(['ValidateKeyspace', 'test_keyspace'])
    # not pinging tablets, as it enables replication checks, and they
    # break because we only have a single master, no slaves
    utils.run_vtctl(['ValidateShard', '-ping-tablets=false',
                     'test_keyspace/0'])
    self._check_srv_shard()

    tablet_62344.kill_vttablet()

Example #31

0

Show file

    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'keyspace_id', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_slave.init_tablet('spare', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_ny_slave.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_slave,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_slave, shard_1_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_0_ny_slave.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_ny_slave.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_rdonly.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('spare', 'test_keyspace', 'c0-')
        shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None,
                                      target_tablet_type='replica')
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_replica, shard_3_rdonly
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/c0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # take the snapshot for the split
        utils.run_vtctl([
            'MultiSnapshot', '--spec=80-c0-', '--exclude_tables=unrelated',
            shard_1_slave1.tablet_alias
        ],
                        auto_log=True)

        # the snapshot_copy hook will copy the snapshot files to
        # VTDATAROOT/tmp/... as a test. We want to use these for one half,
        # but not for the other, so we test both scenarios.
        os.unlink(
            os.path.join(
                environment.tmproot, "snapshot-from-%s-for-%s.tar" %
                (shard_1_slave1.tablet_alias, "80-c0")))

        # wait for tablet's binlog server service to be enabled after snapshot
        shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # perform the restores: first one from source tablet. We removed the
        # storage backup, so it's coming from the tablet itself.
        # we also delay starting the binlog player, then enable it.
        utils.run_vtctl([
            'ShardMultiRestore',
            '-strategy=populateBlpCheckpoint,dontStartBinlogPlayer',
            'test_keyspace/80-c0', shard_1_slave1.tablet_alias
        ],
                        auto_log=True)

        timeout = 10
        while True:
            shard_2_master_status = shard_2_master.get_status()
            if not "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                timeout = utils.wait_step(
                    'shard 2 master has not failed starting yet', timeout)
                continue
            logging.debug("shard 2 master is waiting on flag removal, good")
            break

        qr = utils.run_vtctl_json([
            'ExecuteFetch', shard_2_master.tablet_alias,
            'update _vt.blp_checkpoint set flags="" where source_shard_uid=0'
        ])
        self.assertEqual(qr['RowsAffected'], 1)

        timeout = 10
        while True:
            shard_2_master_status = shard_2_master.get_status()
            if "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                timeout = utils.wait_step(
                    'shard 2 master has not started replication yet', timeout)
                continue
            logging.debug("shard 2 master has started replication, good")
            break

        # second restore from storage: to be sure, we stop vttablet, and restart
        # it afterwards
        shard_1_slave1.kill_vttablet()
        utils.run_vtctl([
            'ShardMultiRestore', '-strategy=populateBlpCheckpoint',
            'test_keyspace/c0-', shard_1_slave1.tablet_alias
        ],
                        auto_log=True)
        shard_1_slave1.start_vttablet(wait_for_state=None)
        shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master,
                                       seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master,
                                       seconds_behind_master_max=30)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('Binlog player state: Running', shard_2_master_status)
        self.assertIn(
            '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>',
            shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere.
        shard_2_master_vars = utils.get_vars(shard_2_master.port)
        self.assertEqual(shard_2_master_vars['TabletStateName'], 'NOT_SERVING')
        shard_3_master_vars = utils.get_vars(shard_3_master.port)
        self.assertEqual(shard_3_master_vars['TabletStateName'], 'NOT_SERVING')

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # then serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'ReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])
        logging.debug(
            "Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use the vtworker checker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('No binlog player is running', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # scrap the original tablets in the original shard
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_slave, shard_1_rdonly
        ]:
            utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave,
            shard_1_rdonly
        ])
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_slave, shard_1_rdonly
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        if shard['Cells']:
            self.fail("Non-empty Cells record for shard: %s" % str(shard))

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_slave, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly
        ])

Example #32

0

Show file

File: reparent.py Project: yyzi/vitess

    def _test_reparent_graceful(self, shard_id):
        utils.run_vtctl('CreateKeyspace test_keyspace')

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('master',
                                 'test_keyspace',
                                 shard_id,
                                 start=True)
        if environment.topo_server_implementation == 'zookeeper':
            shard = utils.run_vtctl_json(
                ['GetShard', 'test_keyspace/' + shard_id])
            self.assertEqual(
                shard['Cells'], ['test_nj'],
                'wrong list of cell in Shard: %s' % str(shard['Cells']))

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        for t in [tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state("SERVING")
        if environment.topo_server_implementation == 'zookeeper':
            shard = utils.run_vtctl_json(
                ['GetShard', 'test_keyspace/' + shard_id])
            self.assertEqual(
                shard['Cells'], ['test_nj', 'test_ny'],
                'wrong list of cell in Shard: %s' % str(shard['Cells']))

        # Recompute the shard layout node - until you do that, it might not be valid.
        utils.run_vtctl('RebuildShardGraph test_keyspace/' + shard_id)
        utils.validate_topology()

        # Force the slaves to reparent assuming that all the datasets are identical.
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.reset_replication()
        utils.pause("force ReparentShard?")
        utils.run_vtctl('ReparentShard -force test_keyspace/%s %s' %
                        (shard_id, tablet_62344.tablet_alias))
        utils.validate_topology(ping_tablets=True)

        self._check_db_addr(shard_id, 'master', tablet_62344.port)

        # Verify MasterCell is set to new cell.
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_nj',
             'test_keyspace/%s' % (shard_id)])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_ny',
             'test_keyspace/%s' % (shard_id)])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')

        # Convert two replica to spare. That should leave only one node serving traffic,
        # but still needs to appear in the replication graph.
        utils.run_vtctl(
            ['ChangeSlaveType', tablet_41983.tablet_alias, 'spare'])
        utils.run_vtctl(
            ['ChangeSlaveType', tablet_31981.tablet_alias, 'spare'])
        utils.validate_topology()
        self._check_db_addr(shard_id, 'replica', tablet_62044.port)

        # Run this to make sure it succeeds.
        utils.run_vtctl('ShardReplicationPositions test_keyspace/%s' %
                        shard_id,
                        stdout=utils.devnull)

        # Perform a graceful reparent operation.
        utils.pause("graceful ReparentShard?")
        utils.run_vtctl('ReparentShard test_keyspace/%s %s' %
                        (shard_id, tablet_62044.tablet_alias),
                        auto_log=True)
        utils.validate_topology()

        self._check_db_addr(shard_id, 'master', tablet_62044.port)

        # Verify MasterCell is set to new cell.
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_nj',
             'test_keyspace/%s' % (shard_id)])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_ny',
             'test_keyspace/%s' % (shard_id)])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])

        # Test address correction.
        new_port = environment.reserve_ports(1)
        tablet_62044.start_vttablet(port=new_port)
        # Wait a moment for address to reregister.
        time.sleep(1.0)

        self._check_db_addr(shard_id, 'master', new_port)

        tablet_62044.kill_vttablet()

Example #33

0

Show file

File: keyspace_test.py Project: yyzi/vitess

 def test_get_keyspace(self):
     ki = utils.run_vtctl_json(['GetKeyspace', UNSHARDED_KEYSPACE])
     self.assertEqual('keyspace_id', ki['ShardingColumnName'])
     self.assertEqual('uint64', ki['ShardingColumnType'])

Example #34

0

Show file

File: tabletmanager.py Project: gitssie/vitess

    def test_health_check(self):
        # one master, one replica that starts in spare
        # (for the replica, we let vttablet do the InitTablet)
        tablet_62344.init_tablet('master', 'test_keyspace', '0')

        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')

        tablet_62344.start_vttablet(wait_for_state=None,
                                    target_tablet_type='replica')
        tablet_62044.start_vttablet(wait_for_state=None,
                                    target_tablet_type='replica',
                                    lameduck_period='5s',
                                    init_keyspace='test_keyspace',
                                    init_shard='0')

        tablet_62344.wait_for_vttablet_state('SERVING')
        tablet_62044.wait_for_vttablet_state('NOT_SERVING')
        self.check_healthz(tablet_62044, False)

        utils.run_vtctl(
            ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias])

        # make sure the 'spare' slave goes to 'replica'
        self.wait_for_tablet_type_change(tablet_62044.tablet_alias, 'replica')
        self.check_healthz(tablet_62044, True)

        # make sure the master is still master
        ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias])
        self.assertEqual(ti['type'], tablet.Tablet.tablet_type_value['MASTER'],
                         'unexpected master type: %s' % ti['type'])

        # stop replication, make sure we go unhealthy.
        utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias])
        self.wait_for_tablet_type_change(tablet_62044.tablet_alias, 'spare')
        self.check_healthz(tablet_62044, False)

        # make sure the serving graph was updated
        timeout = 10
        while True:
            try:
                utils.run_vtctl_json(
                    ['GetEndPoints', 'test_nj', 'test_keyspace/0', 'replica'])
            except:
                logging.debug('Tablet is gone from serving graph, good')
                break
            timeout = utils.wait_step(
                'Stopped replication didn\'t trigger removal from serving graph',
                timeout)

        # make sure status web page is unhappy
        self.assertIn(
            '>unhealthy: replication_reporter: '
            'Replication is not running</span></div>',
            tablet_62044.get_status())

        # make sure the health stream is updated
        health = utils.run_vtctl_json(
            ['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias])
        self.assertIn('replication_reporter: Replication is not running',
                      health['realtime_stats']['health_error'])

        # then restart replication, and write data, make sure we go back to healthy
        utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
        self.wait_for_tablet_type_change(tablet_62044.tablet_alias, 'replica')

        # make sure status web page is healthy
        self.assertIn('>healthy</span></div>', tablet_62044.get_status())

        # make sure the vars is updated
        v = utils.get_vars(tablet_62044.port)
        self.assertEqual(v['LastHealthMapCount'], 0)

        # now test VtTabletStreamHealth returns the right thing
        stdout, stderr = utils.run_vtctl(
            ['VtTabletStreamHealth', '-count', '2', tablet_62044.tablet_alias],
            trap_output=True,
            auto_log=True)
        lines = stdout.splitlines()
        self.assertEqual(len(lines), 2)
        for line in lines:
            logging.debug('Got health: %s', line)
            data = json.loads(line)
            self.assertIn('realtime_stats', data)
            self.assertNotIn('health_error', data['realtime_stats'])
            self.assertNotIn('tablet_externally_reparented_timestamp', data)
            self.assertEqual('test_keyspace', data['target']['keyspace'])
            self.assertEqual('0', data['target']['shard'])
            self.assertEqual(3, data['target']['tablet_type'])

        # kill the tablets
        tablet.kill_tablets([tablet_62344, tablet_62044])

        # the replica was in lameduck for 5 seconds, should have been enough
        # to reset its state to spare
        ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
        self.assertEqual(
            ti['type'], tablet.Tablet.tablet_type_value['SPARE'],
            "tablet didn't go to spare while in lameduck mode: %s" % str(ti))

Example #35

0

Show file

File: resharding.py Project: yekeqiang/vitess

    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'keyspace_id', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_slave1, shard_1_slave2, shard_1_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_rdonly.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-C0')
        shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-C0')
        shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-C0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'C0-')
        shard_3_replica.init_tablet('spare', 'test_keyspace', 'C0-')
        shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'C0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly
        ]:
            t.start_vttablet(wait_for_state=None)
        shard_2_master.wait_for_vttablet_state('CONNECTING')
        shard_2_replica1.wait_for_vttablet_state('NOT_SERVING')
        shard_2_replica2.wait_for_vttablet_state('NOT_SERVING')
        shard_3_master.wait_for_vttablet_state('CONNECTING')
        shard_3_replica.wait_for_vttablet_state('NOT_SERVING')
        shard_3_rdonly.wait_for_vttablet_state('CONNECTING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-C0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/C0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # take the snapshot for the split
        utils.run_vtctl(
            ['MultiSnapshot', '--spec=80-C0-', shard_1_slave1.tablet_alias],
            auto_log=True)

        # wait for tablet's binlog server service to be enabled after snapshot,
        # and check all the others while we're at it
        shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # perform the restore.
        utils.run_vtctl([
            'ShardMultiRestore', '-strategy=populateBlpCheckpoint',
            'test_keyspace/80-C0', shard_1_slave1.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ShardMultiRestore', '-strategy=populateBlpCheckpoint',
            'test_keyspace/C0-', shard_1_slave1.tablet_alias
        ],
                        auto_log=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check the binlog players are running
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # then serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-C0 C0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-C0 C0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'ReparentShard', 'test_keyspace/80-C0',
            shard_2_replica1.tablet_alias
        ])
        logging.debug(
            "Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use the vtworker checker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-C0 C0-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-C0 C0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # scrap the original tablets in the original shard
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_rdonly
        ]:
            utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
        tablet.kill_tablets(
            [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_rdonly])

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        if shard['Cells']:
            self.fail("Non-empty Cells record for shard: %s" % str(shard))

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_2_master, shard_2_replica1,
            shard_2_replica2, shard_3_master, shard_3_replica, shard_3_rdonly
        ])

Example #36

0

Show file

 def _check_master_cell(self, cell, shard_id, master_cell):
   srvShard = utils.run_vtctl_json(['GetSrvShard', cell,
                                    'test_keyspace/%s' % (shard_id)])
   self.assertEqual(srvShard['master_cell'], master_cell)

Example #37

0

Show file

File: tabletmanager.py Project: gitssie/vitess

 def _check_srv_shard(self):
     srvShard = utils.run_vtctl_json(
         ['GetSrvShard', 'test_nj', 'test_keyspace/0'])
     self.assertEqual(srvShard['master_cell'], 'test_nj')

Example #38

0

Show file

    def test_no_mysql_healthcheck(self):
        """This test starts a vttablet with no mysql port, while mysql is down.

    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
        # we need replication to be enabled, so the slave tablet can be healthy.
        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')
        pos = mysql_flavor().master_position(tablet_62344)
        # Use 'localhost' as hostname because Travis CI worker hostnames
        # are too long for MySQL replication.
        change_master_cmds = mysql_flavor().change_master_commands(
            'localhost', tablet_62344.mysql_port, pos)
        tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
                            change_master_cmds + ['START SLAVE'])

        # now shutdown all mysqld
        shutdown_procs = [
            tablet_62344.shutdown_mysql(),
            tablet_62044.shutdown_mysql(),
        ]
        utils.wait_procs(shutdown_procs)

        # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
        tablet_62344.init_tablet('master', 'test_keyspace', '0')
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 '0',
                                 include_mysql_port=False)
        for t in tablet_62344, tablet_62044:
            # Since MySQL is down at this point and we want the tablet to start up
            # successfully, we have to use supports_backups=False.
            t.start_vttablet(wait_for_state=None,
                             supports_backups=False,
                             full_mycnf_args=True,
                             include_mysql_port=False)
        for t in tablet_62344, tablet_62044:
            t.wait_for_vttablet_state('NOT_SERVING')
            self.check_healthz(t, False)

        # Tell slave to not try to repair replication in healthcheck.
        # The StopSlave will ultimately fail because mysqld is not running,
        # But vttablet should remember that it's not supposed to fix replication.
        utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias],
                        expect_fail=True)

        # The above notice to not fix replication should survive tablet restart.
        tablet_62044.kill_vttablet()
        tablet_62044.start_vttablet(wait_for_state='NOT_SERVING',
                                    full_mycnf_args=True,
                                    include_mysql_port=False,
                                    supports_backups=False)

        # restart mysqld
        start_procs = [
            tablet_62344.start_mysql(),
            tablet_62044.start_mysql(),
        ]
        utils.wait_procs(start_procs)

        # the master should still be healthy
        utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias],
                        auto_log=True)
        self.check_healthz(tablet_62344, True)

        # the slave will now be healthy, but report a very high replication
        # lag, because it can't figure out what it exactly is.
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias],
                        auto_log=True)
        tablet_62044.wait_for_vttablet_state('SERVING')
        self.check_healthz(tablet_62044, True)

        health = utils.run_vtctl_json(
            ['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias])
        self.assertTrue('seconds_behind_master' in health['realtime_stats'])
        self.assertEqual(health['realtime_stats']['seconds_behind_master'],
                         7200)
        self.assertIn('serving', health)

        # restart replication, wait until health check goes small
        # (a value of zero is default and won't be in structure)
        utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
        timeout = 10
        while True:
            utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias],
                            auto_log=True)
            health = utils.run_vtctl_json([
                'VtTabletStreamHealth', '-count', '1',
                tablet_62044.tablet_alias
            ])
            if 'serving' in health and (
                ('seconds_behind_master' not in health['realtime_stats']) or
                (health['realtime_stats']['seconds_behind_master'] < 30)):
                break
            timeout = utils.wait_step('health delay goes back down', timeout)

        # wait for the tablet to fix its mysql port
        for t in tablet_62344, tablet_62044:
            # wait for mysql port to show up
            timeout = 10
            while True:
                ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
                if 'mysql' in ti['port_map']:
                    break
                timeout = utils.wait_step('mysql port in tablet record',
                                          timeout)
            self.assertEqual(ti['port_map']['mysql'], t.mysql_port)

        # all done
        tablet.kill_tablets([tablet_62344, tablet_62044])

Example #39

0

Show file

File: reparent.py Project: zklapow/vitess

    def test_reparent_cross_cell(self, shard_id='0'):
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
        self.assertEqual(
            shard['cells'], ['test_nj'],
            'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Create a few slaves for testing reparenting. Won't be healthy
        # as replication is not running.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('NOT_SERVING')
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
        self.assertEqual(
            shard['cells'], ['test_nj', 'test_ny'],
            'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ],
                        auto_log=True)
        utils.validate_topology(ping_tablets=True)

        self._check_master_tablet(tablet_62344)

        # Perform a graceful reparent operation to another cell.
        utils.run_vtctl([
            'PlannedReparentShard', '-keyspace_shard', 'test_keyspace/' +
            shard_id, '-new_master', tablet_31981.tablet_alias
        ],
                        auto_log=True)
        utils.validate_topology()

        self._check_master_tablet(tablet_31981)

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])

Example #40

0

Show file

  def test_vertical_split(self):
    utils.run_vtctl(['CopySchemaShard', '--tables', '/moving/,view1',
                     source_rdonly1.tablet_alias, 'destination_keyspace/0'],
                    auto_log=True)

    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        'VerticalSplitClone',
                        '--tables', '/moving/,view1',
                        '--chunk_count', '10',
                        '--min_rows_per_chunk', '1',
                        '--min_healthy_rdonly_tablets', '1',
                        'destination_keyspace/0'],
                       auto_log=True)

    # check values are present
    self._check_values(destination_master, 'vt_destination_keyspace', 'moving1',
                       self.moving1_first, 100)
    self._check_values(destination_master, 'vt_destination_keyspace', 'moving2',
                       self.moving2_first, 100)
    self._check_values(destination_master, 'vt_destination_keyspace', 'view1',
                       self.moving1_first, 100)

    # check the binlog player is running and exporting vars
    self.check_destination_master(destination_master, ['source_keyspace/0'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(source_replica, horizontal=False)

    # add values to source, make sure they're replicated
    moving1_first_add1 = self._insert_values('moving1', 100)
    _ = self._insert_values('staying1', 100)
    moving2_first_add1 = self._insert_values('moving2', 100)
    self._check_values_timeout(destination_master, 'vt_destination_keyspace',
                               'moving1', moving1_first_add1, 100)
    self._check_values_timeout(destination_master, 'vt_destination_keyspace',
                               'moving2', moving2_first_add1, 100)
    self.check_binlog_player_vars(destination_master, ['source_keyspace/0'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(source_replica, horizontal=False,
                                  min_statements=100, min_transactions=100)

    # use vtworker to compare the data
    logging.debug('Running vtworker VerticalSplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'VerticalSplitDiff',
                        '--min_healthy_rdonly_tablets', '1',
                        'destination_keyspace/0'], auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # get status for destination master tablet, make sure we have it all
    self.check_running_binlog_player(destination_master, 700, 300,
                                     extra_text='moving')

    # check query service is off on destination master, as filtered
    # replication is enabled. Even health check should not interfere.
    destination_master_vars = utils.get_vars(destination_master.port)
    self.assertEqual(destination_master_vars['TabletStateName'], 'NOT_SERVING')

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'],
                    expect_fail=True)

    # migrate rdonly only in test_ny cell, make sure nothing is migrated
    # in test_nj
    utils.run_vtctl(['MigrateServedFrom', '--cells=test_ny',
                     'destination_keyspace/0', 'rdonly'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(rdonly): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly1, None)
    self._check_blacklisted_tables(source_rdonly2, None)

    # migrate test_nj only, using command line manual fix command,
    # and restore it back.
    keyspace_json = utils.run_vtctl_json(
        ['GetKeyspace', 'destination_keyspace'])
    found = False
    for ksf in keyspace_json['served_froms']:
      if ksf['tablet_type'] == topodata_pb2.RDONLY:
        found = True
        self.assertEqual(ksf['cells'], ['test_nj'])
    self.assertTrue(found)
    utils.run_vtctl(['SetKeyspaceServedFrom', '-source=source_keyspace',
                     '-remove', '-cells=test_nj', 'destination_keyspace',
                     'rdonly'], auto_log=True)
    keyspace_json = utils.run_vtctl_json(
        ['GetKeyspace', 'destination_keyspace'])
    found = False
    for ksf in keyspace_json['served_froms']:
      if ksf['tablet_type'] == topodata_pb2.RDONLY:
        found = True
    self.assertFalse(found)
    utils.run_vtctl(['SetKeyspaceServedFrom', '-source=source_keyspace',
                     'destination_keyspace', 'rdonly'],
                    auto_log=True)
    keyspace_json = utils.run_vtctl_json(
        ['GetKeyspace', 'destination_keyspace'])
    found = False
    for ksf in keyspace_json['served_froms']:
      if ksf['tablet_type'] == topodata_pb2.RDONLY:
        found = True
        self.assertNotIn('cells', ksf)
    self.assertTrue(found)

    # now serve rdonly from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'rdonly'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    self._check_client_conn_redirection(
        'destination_keyspace',
        ['master', 'replica'], ['moving1', 'moving2'])

    # then serve replica from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    self._check_client_conn_redirection(
        'destination_keyspace',
        ['master'], ['moving1', 'moving2'])

    # move replica back and forth
    utils.run_vtctl(['MigrateServedFrom', '-reverse',
                     'destination_keyspace/0', 'replica'], auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])
    self._check_client_conn_redirection(
        'destination_keyspace',
        ['master'], ['moving1', 'moving2'])

    # then serve master from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'],
                    auto_log=True)
    self._check_srv_keyspace('')
    self._check_blacklisted_tables(source_master, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_replica, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly1, ['/moving/', 'view1'])
    self._check_blacklisted_tables(source_rdonly2, ['/moving/', 'view1'])

    # check the binlog player is gone now
    self.check_no_binlog_player(destination_master)

    # check the stats are correct
    self._check_stats()

    # now remove the tables on the source shard. The blacklisted tables
    # in the source shard won't match any table, make sure that works.
    utils.run_vtctl(['ApplySchema',
                     '-sql=drop view view1',
                     'source_keyspace'],
                    auto_log=True)
    for t in ['moving1', 'moving2']:
      utils.run_vtctl(['ApplySchema',
                       '-sql=drop table %s' % (t),
                       'source_keyspace'],
                      auto_log=True)
    for t in [source_master, source_replica, source_rdonly1, source_rdonly2]:
      utils.run_vtctl(['ReloadSchema', t.tablet_alias])
    qr = source_master.execute('select count(1) from staying1')
    self.assertEqual(len(qr['rows']), 1,
                     'cannot read staying1: got %s' % str(qr))

    # test SetShardTabletControl
    self._verify_vtctl_set_shard_tablet_control()

Example #41

0

Show file

    def test_health_check(self):
        # one master, one replica that starts not initialized
        # (for the replica, we let vttablet do the InitTablet)
        tablet_62344.init_tablet('master', 'test_keyspace', '0')

        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')

        tablet_62344.start_vttablet(wait_for_state=None)
        tablet_62044.start_vttablet(wait_for_state=None,
                                    lameduck_period='5s',
                                    init_tablet_type='replica',
                                    init_keyspace='test_keyspace',
                                    init_shard='0')

        tablet_62344.wait_for_vttablet_state('SERVING')
        tablet_62044.wait_for_vttablet_state('NOT_SERVING')
        self.check_healthz(tablet_62044, False)

        utils.run_vtctl(
            ['InitShardMaster', 'test_keyspace/0', tablet_62344.tablet_alias])

        # make sure the unhealthy slave goes to healthy
        tablet_62044.wait_for_vttablet_state('SERVING')
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        self.check_healthz(tablet_62044, True)

        # make sure the master is still master
        ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias])
        self.assertEqual(ti['type'], topodata_pb2.MASTER,
                         'unexpected master type: %s' % ti['type'])

        # stop replication at the mysql level.
        tablet_62044.mquery('', 'stop slave')
        # vttablet replication_reporter should restart it.
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        # insert something on the master and wait for it on the slave.
        tablet_62344.mquery('vt_test_keyspace', [
            'create table repl_test_table (id int)',
            'insert into repl_test_table values (123)'
        ],
                            write=True)
        timeout = 10.0
        while True:
            try:
                result = tablet_62044.mquery('vt_test_keyspace',
                                             'select * from repl_test_table')
                if result:
                    self.assertEqual(result[0][0], 123L)
                    break
            except MySQLdb.ProgrammingError:
                # Maybe the create table hasn't gone trough yet, we wait more
                logging.exception(
                    'got this exception waiting for data, ignoring it')
            timeout = utils.wait_step(
                'slave replication repaired by replication_reporter', timeout)

        # stop replication, make sure we don't go unhealthy.
        # (we have a baseline as well, so the time should be good).
        utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias])
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
        self.check_healthz(tablet_62044, True)

        # make sure status web page is healthy
        self.assertIn('>healthy</span></div>', tablet_62044.get_status())

        # make sure the health stream is updated
        health = utils.run_vtctl_json(
            ['VtTabletStreamHealth', '-count', '1', tablet_62044.tablet_alias])
        self.assertTrue(
            ('seconds_behind_master' not in health['realtime_stats'])
            or (health['realtime_stats']['seconds_behind_master'] < 30),
            'got unexpected health: %s' % str(health))
        self.assertIn('serving', health)

        # then restart replication, make sure we stay healthy
        utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
        utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])

        # make sure status web page is healthy
        self.assertIn('>healthy</span></div>', tablet_62044.get_status())

        # now test VtTabletStreamHealth returns the right thing
        stdout, _ = utils.run_vtctl(
            ['VtTabletStreamHealth', '-count', '2', tablet_62044.tablet_alias],
            trap_output=True,
            auto_log=True)
        lines = stdout.splitlines()
        self.assertEqual(len(lines), 2)
        for line in lines:
            logging.debug('Got health: %s', line)
            data = json.loads(line)
            self.assertIn('realtime_stats', data)
            self.assertIn('serving', data)
            self.assertTrue(data['serving'])
            self.assertNotIn('health_error', data['realtime_stats'])
            self.assertNotIn('tablet_externally_reparented_timestamp', data)
            self.assertEqual('test_keyspace', data['target']['keyspace'])
            self.assertEqual('0', data['target']['shard'])
            self.assertEqual(topodata_pb2.REPLICA,
                             data['target']['tablet_type'])

        # Test that VtTabletStreamHealth reports a QPS >0.0.
        # Therefore, issue several reads first.
        # NOTE: This may be potentially flaky because we'll observe a QPS >0.0
        #       exactly "once" for the duration of one sampling interval (5s) and
        #       after that we'll see 0.0 QPS rates again. If this becomes actually
        #       flaky, we need to read continuously in a separate thread.
        for _ in range(10):
            tablet_62044.execute('select 1 from dual')
        # This may take up to 5 seconds to become true because we sample the query
        # counts for the rates only every 5 seconds (see query_service_stats.go).
        timeout = 10
        while True:
            health = utils.run_vtctl_json([
                'VtTabletStreamHealth', '-count', '1',
                tablet_62044.tablet_alias
            ])
            if health['realtime_stats'].get('qps', 0.0) > 0.0:
                break
            timeout = utils.wait_step('QPS >0.0 seen', timeout)

        # kill the tablets
        tablet.kill_tablets([tablet_62344, tablet_62044])

Example #42

0

Show file

  def test_vertical_split(self):
    utils.run_vtctl(['CreateKeyspace', 'source_keyspace'])
    utils.run_vtctl(['CreateKeyspace',
                     '--served-from', 'master:source_keyspace,replica:source_keyspace,rdonly:source_keyspace',
                     'destination_keyspace'])
    source_master.init_tablet('master', 'source_keyspace', '0')
    source_replica.init_tablet('replica', 'source_keyspace', '0')
    source_rdonly.init_tablet('rdonly', 'source_keyspace', '0')

    # rebuild destination keyspace to make sure there is a serving
    # graph entry, even though there is no tablet yet.
    utils.run_vtctl(['RebuildKeyspaceGraph', 'source_keyspace'], auto_log=True)
    utils.run_vtctl(['RebuildKeyspaceGraph', 'destination_keyspace'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' +
                             'ServedFrom(rdonly): source_keyspace\n' +
                             'ServedFrom(replica): source_keyspace\n')

    destination_master.init_tablet('master', 'destination_keyspace', '0')
    destination_replica.init_tablet('replica', 'destination_keyspace', '0')
    destination_rdonly.init_tablet('rdonly', 'destination_keyspace', '0')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'source_keyspace'], auto_log=True)
    utils.run_vtctl(['RebuildKeyspaceGraph', 'destination_keyspace'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' +
                             'ServedFrom(rdonly): source_keyspace\n' +
                             'ServedFrom(replica): source_keyspace\n')

    # create databases so vttablet can start behaving normally
    for t in [source_master, source_replica, source_rdonly]:
      t.create_db('vt_source_keyspace')
      t.start_vttablet(wait_for_state=None)
    destination_master.start_vttablet(wait_for_state=None,
                                      target_tablet_type='replica')
    for t in [destination_replica, destination_rdonly]:
      t.start_vttablet(wait_for_state=None)

    # wait for the tablets
    for t in [source_master, source_replica, source_rdonly]:
      t.wait_for_vttablet_state('SERVING')
    for t in [destination_master, destination_replica, destination_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['ReparentShard', '-force', 'source_keyspace/0',
                     source_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'destination_keyspace/0',
                     destination_master.tablet_alias], auto_log=True)

    # read all the keyspaces, this will populate the topology cache.
    self._populate_topo_cache()

    # create the schema on the source keyspace, add some values
    self._create_source_schema()
    moving1_first = self._insert_values('moving1', 100)
    moving2_first = self._insert_values('moving2', 100)
    staying1_first = self._insert_values('staying1', 100)
    staying2_first = self._insert_values('staying2', 100)
    self._check_values(source_master, 'vt_source_keyspace', 'moving1',
                       moving1_first, 100)
    self._check_values(source_master, 'vt_source_keyspace', 'moving2',
                       moving2_first, 100)
    self._check_values(source_master, 'vt_source_keyspace', 'staying1',
                       staying1_first, 100)
    self._check_values(source_master, 'vt_source_keyspace', 'staying2',
                       staying2_first, 100)
    self._check_values(source_master, 'vt_source_keyspace', 'view1',
                       moving1_first, 100)

    if use_clone_worker:
      # the worker will do everything. We test with source_reader_count=10
      # (down from default=20) as connection pool is not big enough for 20.
      # min_table_size_for_split is set to 1 as to force a split even on the
      # small table we have.
      utils.run_vtworker(['--cell', 'test_nj',
                          '--command_display_interval', '10ms',
                          'VerticalSplitClone',
                          '--tables', 'moving.*,view1',
                          '--strategy', 'populateBlpCheckpoint',
                          '--source_reader_count', '10',
                          '--min_table_size_for_split', '1',
                          'destination_keyspace/0'],
                         auto_log=True)

    else:
      # take the snapshot for the split
      utils.run_vtctl(['MultiSnapshot',
                       '--tables', 'moving.*,view1',
                       source_rdonly.tablet_alias], auto_log=True)

      # perform the restore.
      utils.run_vtctl(['ShardMultiRestore',
                       '--strategy' ,'populateBlpCheckpoint',
                       '--tables', 'moving.*,view1',
                       'destination_keyspace/0', source_rdonly.tablet_alias],
                      auto_log=True)

    topology.refresh_keyspace(self.vtgate_client, 'destination_keyspace')

    # check values are present
    self._check_values(destination_master, 'vt_destination_keyspace', 'moving1',
                       moving1_first, 100)
    self._check_values(destination_master, 'vt_destination_keyspace', 'moving2',
                       moving2_first, 100)
    self._check_values(destination_master, 'vt_destination_keyspace', 'view1',
                       moving1_first, 100)

    # check the binlog players is running
    destination_master.wait_for_binlog_player_count(1)

    # add values to source, make sure they're replicated
    moving1_first_add1 = self._insert_values('moving1', 100)
    staying1_first_add1 = self._insert_values('staying1', 100)
    moving2_first_add1 = self._insert_values('moving2', 100)
    self._check_values_timeout(destination_master, 'vt_destination_keyspace',
                               'moving1', moving1_first_add1, 100)
    self._check_values_timeout(destination_master, 'vt_destination_keyspace',
                               'moving2', moving2_first_add1, 100)

    # use the vtworker checker to compare the data
    logging.debug("Running vtworker VerticalSplitDiff")
    utils.run_vtworker(['-cell', 'test_nj', 'VerticalSplitDiff',
                        'destination_keyspace/0'], auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', source_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', destination_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)

    utils.pause("Good time to test vtworker for diffs")

    # get status for destination master tablet, make sure we have it all
    destination_master_status = destination_master.get_status()
    self.assertIn('Binlog player state: Running', destination_master_status)
    self.assertIn('moving.*', destination_master_status)
    self.assertIn('<td><b>All</b>: 1000<br><b>Query</b>: 700<br><b>Transaction</b>: 300<br></td>', destination_master_status)
    self.assertIn('</html>', destination_master_status)

    # check query service is off on destination master, as filtered
    # replication is enabled. Even health check should not interfere.
    destination_master_vars = utils.get_vars(destination_master.port)
    self.assertEqual(destination_master_vars['TabletStateName'], 'NOT_SERVING')

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'],
                    expect_fail=True)

    # now serve rdonly from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'rdonly'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' +
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly, ['moving.*', 'view1'])
    self._check_client_conn_redirection(
        'source_keyspace', 'destination_keyspace', ['rdonly'],
        ['master', 'replica'], ['moving1', 'moving2'])

    # then serve replica from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, ['moving.*', 'view1'])
    self._check_blacklisted_tables(source_rdonly, ['moving.*', 'view1'])
    self._check_client_conn_redirection('source_keyspace', 'destination_keyspace', ['replica', 'rdonly'], ['master'], ['moving1', 'moving2'])

    # move replica back and forth
    utils.run_vtctl(['MigrateServedFrom', '-reverse',
                     'destination_keyspace/0', 'replica'], auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n' +
                             'ServedFrom(replica): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, None)
    self._check_blacklisted_tables(source_rdonly, ['moving.*', 'view1'])
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
                    auto_log=True)
    self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
    self._check_blacklisted_tables(source_master, None)
    self._check_blacklisted_tables(source_replica, ['moving.*', 'view1'])
    self._check_blacklisted_tables(source_rdonly, ['moving.*', 'view1'])
    self._check_client_conn_redirection(
        'source_keyspace', 'destination_keyspace', ['replica', 'rdonly'],
        ['master'], ['moving1', 'moving2'])

    # then serve master from the destination shards
    utils.run_vtctl(['MigrateServedFrom', 'destination_keyspace/0', 'master'],
                    auto_log=True)
    self._check_srv_keyspace('')
    self._check_blacklisted_tables(source_master, ['moving.*', 'view1'])
    self._check_blacklisted_tables(source_replica, ['moving.*', 'view1'])
    self._check_blacklisted_tables(source_rdonly, ['moving.*', 'view1'])
    self._check_client_conn_redirection(
        'source_keyspace', 'destination_keyspace',
        ['replica', 'rdonly', 'master'], [], ['moving1', 'moving2'])

    # check 'vtctl SetShardBlacklistedTables' command works as expected:
    # clear the rdonly entry, re-add it, and then clear all entries.
    utils.run_vtctl(['SetShardBlacklistedTables', 'source_keyspace/0',
                     'rdonly'], auto_log=True)
    shard_json = utils.run_vtctl_json(['GetShard', 'source_keyspace/0'])
    self.assertNotIn('rdonly', shard_json['BlacklistedTablesMap'])
    self.assertIn('replica', shard_json['BlacklistedTablesMap'])
    self.assertIn('master', shard_json['BlacklistedTablesMap'])
    utils.run_vtctl(['SetShardBlacklistedTables', 'source_keyspace/0', 'rdonly',
                     'moving.*,view1'], auto_log=True)
    shard_json = utils.run_vtctl_json(['GetShard', 'source_keyspace/0'])
    self.assertEqual(['moving.*', 'view1'],
                     shard_json['BlacklistedTablesMap']['rdonly'])
    utils.run_vtctl(['SetShardBlacklistedTables', 'source_keyspace/0',
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['SetShardBlacklistedTables', 'source_keyspace/0',
                     'replica'], auto_log=True)
    utils.run_vtctl(['SetShardBlacklistedTables', 'source_keyspace/0',
                     'master'], auto_log=True)
    shard_json = utils.run_vtctl_json(['GetShard', 'source_keyspace/0'])
    self.assertEqual(None, shard_json['BlacklistedTablesMap'])

    # check the binlog player is gone now
    destination_master.wait_for_binlog_player_count(0)

    # optional method to check the stats are correct
    self._check_stats()

    # kill everything
    tablet.kill_tablets([source_master, source_replica, source_rdonly,
                         destination_master, destination_replica,
                         destination_rdonly])

Example #43

0

Show file

  def _test_reparent_graceful(self, shard_id):
    # create the database so vttablets start, as they are serving
    tablet_62344.create_db('vt_test_keyspace')
    tablet_62044.create_db('vt_test_keyspace')
    tablet_41983.create_db('vt_test_keyspace')
    tablet_31981.create_db('vt_test_keyspace')

    # Start up a master mysql and vttablet
    tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True)
    if environment.topo_server().flavor() == 'zookeeper':
      shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
      self.assertEqual(shard['cells'], ['test_nj'],
                       'wrong list of cell in Shard: %s' % str(shard['cells']))

    # Create a few slaves for testing reparenting.
    tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    for t in [tablet_62044, tablet_41983, tablet_31981]:
      t.wait_for_vttablet_state('SERVING')
    if environment.topo_server().flavor() == 'zookeeper':
      shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
      self.assertEqual(shard['cells'], ['test_nj', 'test_ny'],
                       'wrong list of cell in Shard: %s' % str(shard['cells']))

    # Recompute the shard layout node - until you do that, it might not be
    # valid.
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id])
    utils.validate_topology()

    # Force the slaves to reparent assuming that all the datasets are
    # identical.
    for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
      t.reset_replication()
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/' + shard_id,
                     tablet_62344.tablet_alias])
    utils.validate_topology(ping_tablets=True)
    tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)

    self._check_db_addr(shard_id, 'master', tablet_62344.port)

    # Verify MasterCell is set to new cell.
    self._check_master_cell('test_nj', shard_id, 'test_nj')
    self._check_master_cell('test_ny', shard_id, 'test_nj')

    # Convert two replica to spare. That should leave only one node
    # serving traffic, but still needs to appear in the replication
    # graph.
    utils.run_vtctl(['ChangeSlaveType', tablet_41983.tablet_alias, 'spare'])
    utils.run_vtctl(['ChangeSlaveType', tablet_31981.tablet_alias, 'spare'])
    utils.validate_topology()
    self._check_db_addr(shard_id, 'replica', tablet_62044.port)

    # Run this to make sure it succeeds.
    utils.run_vtctl(['ShardReplicationPositions', 'test_keyspace/' + shard_id],
                    stdout=utils.devnull)

    # Perform a graceful reparent operation.
    utils.pause('_test_reparent_graceful PlannedReparentShard')
    utils.run_vtctl(['PlannedReparentShard', 'test_keyspace/' + shard_id,
                     tablet_62044.tablet_alias], auto_log=True)
    utils.validate_topology()

    self._check_db_addr(shard_id, 'master', tablet_62044.port)

    # insert data into the new master, check the connected slaves work
    self._populate_vt_insert_test(tablet_62044, 1)
    self._check_vt_insert_test(tablet_41983, 1)
    self._check_vt_insert_test(tablet_62344, 1)

    # Verify MasterCell is set to new cell.
    self._check_master_cell('test_nj', shard_id, 'test_nj')
    self._check_master_cell('test_ny', shard_id, 'test_nj')

    tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
                         tablet_31981])

    # Test address correction.
    new_port = environment.reserve_ports(1)
    tablet_62044.start_vttablet(port=new_port)

    # Wait until the new address registers.
    timeout = 30.0
    while True:
      try:
        self._check_db_addr(shard_id, 'master', new_port)
        break
      except:
        timeout = utils.wait_step('waiting for new port to register',
                                  timeout, sleep_time=0.1)

    tablet_62044.kill_vttablet()

Example #44

0

Show file

  def test_reparent_cross_cell(self, shard_id='0'):
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    # create the database so vttablets start, as they are serving
    tablet_62344.create_db('vt_test_keyspace')
    tablet_62044.create_db('vt_test_keyspace')
    tablet_41983.create_db('vt_test_keyspace')
    tablet_31981.create_db('vt_test_keyspace')

    # Start up a master mysql and vttablet
    tablet_62344.init_tablet('master', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
    self.assertEqual(shard['cells'], ['test_nj'],
                     'wrong list of cell in Shard: %s' % str(shard['cells']))

    # Create a few slaves for testing reparenting.
    tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
                             wait_for_start=False)
    for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
      t.wait_for_vttablet_state('SERVING')
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
    self.assertEqual(
        shard['cells'], ['test_nj', 'test_ny'],
        'wrong list of cell in Shard: %s' % str(shard['cells']))

    # Recompute the shard layout node - until you do that, it might not be
    # valid.
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/' + shard_id])
    utils.validate_topology()

    # Force the slaves to reparent assuming that all the datasets are
    # identical.
    for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
      t.reset_replication()
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/' + shard_id,
                     tablet_62344.tablet_alias], auto_log=True)
    utils.validate_topology(ping_tablets=True)

    self._check_db_addr(shard_id, 'master', tablet_62344.port)

    # Verify MasterCell is properly set
    self._check_master_cell('test_nj', shard_id, 'test_nj')
    self._check_master_cell('test_ny', shard_id, 'test_nj')

    # Perform a graceful reparent operation to another cell.
    utils.pause('test_reparent_cross_cell PlannedReparentShard')
    utils.run_vtctl(['PlannedReparentShard', 'test_keyspace/' + shard_id,
                     tablet_31981.tablet_alias], auto_log=True)
    utils.validate_topology()

    self._check_db_addr(shard_id, 'master', tablet_31981.port, cell='test_ny')

    # Verify MasterCell is set to new cell.
    self._check_master_cell('test_nj', shard_id, 'test_ny')
    self._check_master_cell('test_ny', shard_id, 'test_ny')

    tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
                         tablet_31981])

Example #45

0

Show file

File: reparent.py Project: zklapow/vitess

    def _test_reparent_graceful(self, shard_id):
        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
        self.assertEqual(
            shard['cells'], ['test_nj'],
            'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        for t in [tablet_62044, tablet_41983, tablet_31981]:
            t.wait_for_vttablet_state('NOT_SERVING')
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/' + shard_id])
        self.assertEqual(
            shard['cells'], ['test_nj', 'test_ny'],
            'wrong list of cell in Shard: %s' % str(shard['cells']))

        # Force the slaves to reparent assuming that all the datasets are
        # identical.
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ])
        utils.validate_topology(ping_tablets=True)
        tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)

        self._check_master_tablet(tablet_62344)

        utils.validate_topology()

        # Run this to make sure it succeeds.
        stdout, _ = utils.run_vtctl(
            ['ShardReplicationPositions', 'test_keyspace/' + shard_id],
            trap_output=True)
        lines = stdout.splitlines()
        self.assertEqual(len(lines), 4)  # one master, three slaves
        self.assertIn('master', lines[0])  # master first

        # Perform a graceful reparent operation.
        utils.run_vtctl([
            'PlannedReparentShard', '-keyspace_shard', 'test_keyspace/' +
            shard_id, '-new_master', tablet_62044.tablet_alias
        ],
                        auto_log=True)
        utils.validate_topology()

        self._check_master_tablet(tablet_62044)

        # insert data into the new master, check the connected slaves work
        self._populate_vt_insert_test(tablet_62044, 1)
        self._check_vt_insert_test(tablet_41983, 1)
        self._check_vt_insert_test(tablet_62344, 1)

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])

        # Test address correction.
        new_port = environment.reserve_ports(1)
        tablet_62044.start_vttablet(port=new_port)

        # Wait until the new address registers.
        timeout = 30.0
        while True:
            try:
                self._check_master_tablet(tablet_62044, port=new_port)
                break
            except protocols_flavor().client_error_exception_type():
                timeout = utils.wait_step('waiting for new port to register',
                                          timeout,
                                          sleep_time=0.1)

        tablet_62044.kill_vttablet()

Example #46

0

Show file

File: keyspace_test.py Project: GoLandr/NrogBT

 def test_get_keyspace(self):
   ki = utils.run_vtctl_json(['GetKeyspace', UNSHARDED_KEYSPACE])
   self.assertEqual('keyspace_id', ki['sharding_column_name'])
   self.assertEqual(1, ki['sharding_column_type'])

Example #47

0

Show file

File: tabletmanager.py Project: yyzi/vitess

    def test_health_check(self):
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # one master, one replica that starts in spare
        tablet_62344.init_tablet('master', 'test_keyspace', '0')
        tablet_62044.init_tablet('spare', 'test_keyspace', '0')

        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')

        tablet_62344.start_vttablet(wait_for_state=None,
                                    target_tablet_type='replica')
        tablet_62044.start_vttablet(wait_for_state=None,
                                    target_tablet_type='replica',
                                    lameduck_period='5s')

        tablet_62344.wait_for_vttablet_state('SERVING')
        tablet_62044.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/0',
            tablet_62344.tablet_alias
        ])

        # make sure the 'spare' slave goes to 'replica'
        timeout = 10
        while True:
            ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
            if ti['Type'] == "replica":
                logging.info("Slave tablet went to replica, good")
                break
            timeout = utils.wait_step('slave tablet going to replica', timeout)

        # make sure the master is still master
        ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias])
        self.assertEqual(ti['Type'], 'master',
                         "unexpected master type: %s" % ti['Type'])

        # stop replication on the slave, see it trigger the slave going
        # slightly unhealthy
        tablet_62044.mquery('', 'stop slave')
        timeout = 10
        while True:
            ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
            if 'Health' in ti and ti['Health']:
                if 'replication_lag' in ti['Health']:
                    if ti['Health']['replication_lag'] == 'high':
                        logging.info(
                            "Slave tablet replication_lag went to high, good")
                        break
            timeout = utils.wait_step('slave has high replication lag',
                                      timeout)

        # make sure the serving graph was updated
        ep = utils.run_vtctl_json(
            ['GetEndPoints', 'test_nj', 'test_keyspace/0', 'replica'])
        if not ep['entries'][0]['health']:
            self.fail(
                'Replication lag parameter not propagated to serving graph: %s'
                % str(ep))
        self.assertEqual(
            ep['entries'][0]['health']['replication_lag'], 'high',
            'Replication lag parameter not propagated to serving graph: %s' %
            str(ep))

        # make sure status web page is unhappy
        self.assertIn('>unhappy</span></div>', tablet_62044.get_status())

        # make sure the vars is updated
        v = utils.get_vars(tablet_62044.port)
        self.assertEqual(v['LastHealthMapCount'], 1)

        # then restart replication, make sure we go back to healthy
        tablet_62044.mquery('', 'start slave')
        timeout = 10
        while True:
            ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
            if 'Health' in ti and ti['Health']:
                if 'replication_lag' in ti['Health']:
                    if ti['Health']['replication_lag'] == 'high':
                        timeout = utils.wait_step(
                            'slave has no replication lag', timeout)
                        continue
            logging.info("Slave tablet replication_lag is gone, good")
            break

        # make sure status web page is healthy
        self.assertIn('>healthy</span></div>', tablet_62044.get_status())

        # make sure the vars is updated
        v = utils.get_vars(tablet_62044.port)
        self.assertEqual(v['LastHealthMapCount'], 0)

        # kill the tablets
        tablet.kill_tablets([tablet_62344, tablet_62044])

        # the replica was in lameduck for 5 seconds, should have been enough
        # to reset its state to spare
        ti = utils.run_vtctl_json(['GetTablet', tablet_62044.tablet_alias])
        self.assertEqual(
            ti['Type'], 'spare',
            "tablet didn't go to spare while in lameduck mode: %s" % str(ti))

Example #48

0

Show file

File: tabletmanager.py Project: yyzi/vitess

    def _test_sanity(self):
        # Start up a master mysql and vttablet
        utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
        utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
        tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
        utils.validate_topology()
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_nj', 'test_keyspace/0'])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')

        # if these statements don't run before the tablet it will wedge waiting for the
        # db to become accessible. this is more a bug than a feature.
        tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
                              self._populate_vt_select_test)

        tablet_62344.start_vttablet()

        # make sure the query service is started right away
        result, _ = utils.run_vtctl([
            'Query', 'test_nj', 'test_keyspace', 'select * from vt_select_test'
        ],
                                    trap_output=True)
        rows = result.splitlines()
        self.assertEqual(
            len(rows), 5,
            "expected 5 rows in vt_select_test: %s %s" % (str(rows), result))

        # make sure direct dba queries work
        query_result = utils.run_vtctl_json([
            'ExecuteFetch', '-want_fields', tablet_62344.tablet_alias,
            'select * from vt_test_keyspace.vt_select_test'
        ])
        self.assertEqual(
            len(query_result['Rows']), 4,
            "expected 4 rows in vt_select_test: %s" % str(query_result))
        self.assertEqual(
            len(query_result['Fields']), 2,
            "expected 2 fields in vt_select_test: %s" % str(query_result))

        # check Pings
        utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
        utils.run_vtctl(['RpcPing', tablet_62344.tablet_alias])

        # Quickly check basic actions.
        utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias])
        utils.wait_db_read_only(62344)

        utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias])
        utils.check_db_read_write(62344)

        utils.run_vtctl(['DemoteMaster', tablet_62344.tablet_alias])
        utils.wait_db_read_only(62344)

        utils.validate_topology()
        utils.run_vtctl('ValidateKeyspace test_keyspace')
        # not pinging tablets, as it enables replication checks, and they
        # break because we only have a single master, no slaves
        utils.run_vtctl('ValidateShard -ping-tablets=false test_keyspace/0')
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_nj', 'test_keyspace/0'])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')

        tablet_62344.kill_vttablet()

        tablet_62344.init_tablet('idle')
        tablet_62344.scrap(force=True)

Example #49

0

Show file

File: legacy_resharding.py Project: GoLandr/NrogBT

    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'custom_ksid_col', keyspace_id_type
        ])

        shard_0_master.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving somewhat normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets (replication is not setup, they won't be healthy)
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
        self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the split shards
        shard_2_master.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_2_master.start_vttablet(wait_for_state=None)
        shard_3_master.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_replica1, shard_2_replica2, shard_3_replica,
                shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/c0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        for s in ['-80', '80-', '80-c0', 'c0-']:
            self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')

        # disable shard_1_slave2, so we're sure filtered replication will go
        # from shard_1_slave1
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'LegacySplitClone', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
        self.check_destination_master(shard_3_master, ['test_keyspace/80-'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

        # Check that the throttler was enabled.
        self.check_throttler_service(shard_2_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)
        self.check_throttler_service(shard_3_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_1_slave1,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for destination master tablets, make sure we have it all
        self.check_running_binlog_player(shard_2_master, 4000, 2000)
        self.check_running_binlog_player(shard_3_master, 4000, 2000)

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)
        self.check_binlog_server_vars(shard_1_slave2,
                                      horizontal=True,
                                      min_statements=800,
                                      min_transactions=800)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                        auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_2_master)
        self.check_no_binlog_player(shard_3_master)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly1
        ])

Example #50

0

Show file

File: tabletmanager.py Project: yyzi/vitess

    def test_restart_during_action(self):
        # Start up a master mysql and vttablet
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        tablet_62344.init_tablet('master', 'test_keyspace', '0')
        utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
        utils.validate_topology()
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_nj', 'test_keyspace/0'])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62344.start_vttablet()

        utils.run_vtctl(['Ping', tablet_62344.tablet_alias])

        # schedule long action
        utils.run_vtctl(
            ['-no-wait', 'Sleep', tablet_62344.tablet_alias, '15s'],
            stdout=utils.devnull)
        # ping blocks until the sleep finishes unless we have a schedule race
        action_path, _ = utils.run_vtctl(
            ['-no-wait', 'Ping', tablet_62344.tablet_alias], trap_output=True)
        action_path = action_path.strip()

        # kill agent leaving vtaction running
        tablet_62344.kill_vttablet()

        # restart agent
        tablet_62344.start_vttablet()

        # we expect this action with a short wait time to fail. this isn't the best
        # and has some potential for flakiness.
        utils.run_vtctl(['-wait-time', '2s', 'WaitForAction', action_path],
                        expect_fail=True)

        # wait until the background sleep action is done, otherwise there will be
        # a leftover vtaction whose result may overwrite running actions
        # NOTE(alainjobart): Yes, I've seen it happen, it's a pain to debug:
        # the zombie Sleep clobbers the Clone command in the following tests
        utils.run_vtctl(['-wait-time', '20s', 'WaitForAction', action_path],
                        auto_log=True)

        if environment.topo_server_implementation == 'zookeeper':
            # extra small test: we ran for a while, get the states we were in,
            # make sure they're accounted for properly
            # first the query engine States
            v = utils.get_vars(tablet_62344.port)
            logging.debug("vars: %s" % str(v))

            # then the Zookeeper connections
            if v['ZkMetaConn']['test_nj']['Current'] != 'Connected':
                self.fail('invalid zk test_nj state: %s' %
                          v['ZkMetaConn']['test_nj']['Current'])
            if v['ZkMetaConn']['global']['Current'] != 'Connected':
                self.fail('invalid zk global state: %s' %
                          v['ZkMetaConn']['global']['Current'])
            if v['ZkMetaConn']['test_nj']['DurationConnected'] < 10e9:
                self.fail('not enough time in Connected state: %u',
                          v['ZkMetaConn']['test_nj']['DurationConnected'])
            if v['TabletType'] != 'master':
                self.fail('TabletType not exported correctly')

        tablet_62344.kill_vttablet()

Example #51

0

Show file

 def test_srv_vschema(self):
   """Makes sure the SrvVSchema object is properly built."""
   v = utils.run_vtctl_json(['GetSrvVSchema', 'test_nj'])
   self.assertEqual(len(v['keyspaces']), 2, 'wrong vschema: %s' % str(v))
   self.assertIn('user', v['keyspaces'])
   self.assertIn('lookup', v['keyspaces'])

Example #52

0

Show file

File: tabletmanager.py Project: yyzi/vitess

    def test_vtgate(self):
        # Start up a master mysql and vttablet
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
        utils.run_vtctl(['CreateShard', 'test_keyspace/0'])
        tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
        utils.validate_topology()
        srvShard = utils.run_vtctl_json(
            ['GetSrvShard', 'test_nj', 'test_keyspace/0'])
        self.assertEqual(srvShard['MasterCell'], 'test_nj')

        # if these statements don't run before the tablet it will wedge waiting for the
        # db to become accessible. this is more a bug than a feature.
        tablet_62344.mquery("", ["set global read_only = off"])
        tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
                              self._populate_vt_select_test)

        tablet_62344.start_vttablet()
        gate_proc, gate_port = utils.vtgate_start()

        conn = vtgate.connect("localhost:%s" % (gate_port), "master",
                              "test_keyspace", "0", 2.0)

        # _execute
        (result, count, lastrow,
         fields) = conn._execute("select * from vt_select_test", {})
        self.assertEqual(count, 4, "want 4, got %d" % (count))
        self.assertEqual(len(fields), 2, "want 2, got %d" % (len(fields)))

        # _execute_batch
        queries = [
            "select * from vt_select_test where id = :id",
            "select * from vt_select_test where id = :id",
        ]
        bindvars = [
            {
                "id": 1
            },
            {
                "id": 2
            },
        ]
        rowsets = conn._execute_batch(queries, bindvars)
        self.assertEqual(rowsets[0][0][0][0], 1)
        self.assertEqual(rowsets[1][0][0][0], 2)

        # _stream_execute
        (result, count, lastrow,
         fields) = conn._stream_execute("select * from vt_select_test", {})
        self.assertEqual(len(fields), 2, "want 2, got %d" % (len(fields)))
        count = 0
        while 1:
            r = conn._stream_next()
            if not r:
                break
            count += 1
        self.assertEqual(count, 4, "want 4, got %d" % (count))

        # begin-rollback
        conn.begin()
        conn._execute("insert into vt_select_test values(:id, :msg)", {
            "id": 5,
            "msg": "test4"
        })
        conn.rollback()
        (result, count, lastrow,
         fields) = conn._execute("select * from vt_select_test", {})
        self.assertEqual(count, 4, "want 4, got %d" % (count))

        # begin-commit
        conn.begin()
        conn._execute("insert into vt_select_test values(:id, :msg)", {
            "id": 5,
            "msg": "test4"
        })
        conn.commit()
        (result, count, lastrow,
         fields) = conn._execute("select * from vt_select_test", {})
        self.assertEqual(count, 5, "want 5, got %d" % (count))

        # error on dml. We still need to get a transaction id
        conn.begin()
        with self.assertRaises(dbexceptions.IntegrityError):
            conn._execute("insert into vt_select_test values(:id, :msg)", {
                "id": 5,
                "msg": "test4"
            })
        self.assertTrue(conn.session["ShardSessions"][0]["TransactionId"] != 0)
        conn.commit()

        # interleaving
        conn2 = vtgate.connect("localhost:%s" % (gate_port), "master",
                               "test_keyspace", "0", 2.0)
        thd = threading.Thread(target=self._query_lots, args=(conn2, ))
        thd.start()
        for i in xrange(250):
            (result, count, lastrow, fields) = conn._execute(
                "select id from vt_select_test where id = 2", {})
            self.assertEqual(result, [(2, )])
            if i % 10 == 0:
                conn._stream_execute(
                    "select id from vt_select_test where id = 3", {})
                while 1:
                    result = conn._stream_next()
                    if not result:
                        break
                    self.assertEqual(result, (3, ))
        thd.join()

        # close
        conn.close()

        utils.vtgate_kill(gate_proc)
        tablet_62344.kill_vttablet()

Example #53

0

Show file

File: tabletmanager.py Project: gitssie/vitess

    def test_no_mysql_healthcheck(self):
        """This test starts a vttablet with no mysql port, while mysql is down.
    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
        # we need replication to be enabled, so the slave tablet can be healthy.
        for t in tablet_62344, tablet_62044:
            t.create_db('vt_test_keyspace')
        pos = mysql_flavor().master_position(tablet_62344)
        # Use 'localhost' as hostname because Travis CI worker hostnames
        # are too long for MySQL replication.
        changeMasterCmds = mysql_flavor().change_master_commands(
            'localhost', tablet_62344.mysql_port, pos)
        tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
                            changeMasterCmds + ['START SLAVE'])

        # now shutdown all mysqld
        shutdown_procs = [
            tablet_62344.shutdown_mysql(),
            tablet_62044.shutdown_mysql(),
        ]
        utils.wait_procs(shutdown_procs)

        # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
        tablet_62344.init_tablet('master', 'test_keyspace', '0')
        tablet_62044.init_tablet('spare',
                                 'test_keyspace',
                                 '0',
                                 include_mysql_port=False)
        for t in tablet_62344, tablet_62044:
            t.start_vttablet(wait_for_state=None,
                             target_tablet_type='replica',
                             full_mycnf_args=True,
                             include_mysql_port=False)
        for t in tablet_62344, tablet_62044:
            t.wait_for_vttablet_state('NOT_SERVING')
            self.check_healthz(t, False)

        # restart mysqld
        start_procs = [
            tablet_62344.start_mysql(),
            tablet_62044.start_mysql(),
        ]
        utils.wait_procs(start_procs)

        # the master should still be healthy
        utils.run_vtctl(
            ['RunHealthCheck', tablet_62344.tablet_alias, 'replica'],
            auto_log=True)
        self.check_healthz(tablet_62344, True)

        # the slave won't be healthy at first, as replication is not running
        utils.run_vtctl(
            ['RunHealthCheck', tablet_62044.tablet_alias, 'replica'],
            auto_log=True)
        self.check_healthz(tablet_62044, False)
        tablet_62044.wait_for_vttablet_state('NOT_SERVING')

        # restart replication
        tablet_62044.mquery('', ['START SLAVE'])

        # wait for the tablet to become healthy and fix its mysql port
        utils.run_vtctl(
            ['RunHealthCheck', tablet_62044.tablet_alias, 'replica'],
            auto_log=True)
        tablet_62044.wait_for_vttablet_state('SERVING')
        self.check_healthz(tablet_62044, True)

        for t in tablet_62344, tablet_62044:
            # wait for mysql port to show up
            timeout = 10
            while True:
                ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
                if 'mysql' in ti['port_map']:
                    break
                timeout = utils.wait_step('mysql port in tablet record',
                                          timeout)
            self.assertEqual(ti['port_map']['mysql'], t.mysql_port)

        # all done
        tablet.kill_tablets([tablet_62344, tablet_62044])

Example #54

0

Show file

    def test_resharding(self):
        # we're going to reparent and swap these two
        global shard_2_master, shard_2_replica1

        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'custom_ksid_col', base_sharding.keyspace_id_type
        ])

        shard_0_master.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = (
            base_sharding.keyspace_id_type == keyrange_constants.KIT_BYTES)

        # create databases so vttablet can start behaving somewhat normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets (replication is not setup, they won't be healthy)
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
        self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the split shards
        shard_2_master.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_2_master.start_vttablet(wait_for_state=None)
        shard_3_master.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
                shard_3_replica, shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_2_rdonly1, shard_3_master, shard_3_replica,
                shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/c0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        for s in ['-80', '80-', '80-c0', 'c0-']:
            self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # disable shard_1_slave2, so we're sure filtered replication will go
        # from shard_1_slave1
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        # Run vtworker as daemon for the following SplitClone commands.
        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            ['--cell', 'test_nj', '--command_display_interval', '10ms'],
            auto_log=True)

        # Copy the data from the source to the destination shards.
        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        #
        # Initial clone (online).
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 2, 0, 0, 0)

        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Test the correct handling of keyspace_id changes which happen after
        # the first clone.
        # Let row 2 go to shard 3 instead of shard 2.
        shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set'
                              ' custom_ksid_col=0xD000000000000000 WHERE id=2',
                              write=True)
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Row 2 will be deleted from shard 2 and inserted to shard 3.
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 0, 1, 1)
        self._check_value(shard_2_master,
                          'resharding1',
                          2,
                          'msg2',
                          0xD000000000000000,
                          should_be_here=False)
        self._check_value(shard_3_master, 'resharding1', 2, 'msg2',
                          0xD000000000000000)
        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Move row 2 back to shard 2 from shard 3 by changing the keyspace_id again.
        shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set'
                              ' custom_ksid_col=0x9000000000000000 WHERE id=2',
                              write=True)
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Row 2 will be deleted from shard 3 and inserted to shard 2.
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 0, 1, 1)
        self._check_value(shard_2_master, 'resharding1', 2, 'msg2',
                          0x9000000000000000)
        self._check_value(shard_3_master,
                          'resharding1',
                          2,
                          'msg2',
                          0x9000000000000000,
                          should_be_here=False)
        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Modify the destination shard. SplitClone will revert the changes.
        # Delete row 2 (provokes an insert).
        shard_2_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=2',
                              write=True)
        # Update row 3 (provokes an update).
        shard_3_master.mquery(
            'vt_test_keyspace',
            "update resharding1 set msg='msg-not-3' where id=3",
            write=True)
        # Insert row 4 and 5 (provokes a delete).
        self._insert_value(shard_3_master, 'resharding1', 4, 'msg4',
                           0xD000000000000000)
        self._insert_value(shard_3_master, 'resharding1', 5, 'msg5',
                           0xD000000000000000)

        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count',
            '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets',
            '1', '--max_tps', '9999', 'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Change tablet, which was taken offline, back to rdonly.
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 1, 2, 0)
        self.verify_reconciliation_counters(worker_port, 'Offline',
                                            'resharding1', 0, 0, 0, 2)
        # Terminate worker daemon because it is no longer needed.
        utils.kill_sub_process(worker_proc, soft=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
        self.check_destination_master(shard_3_master, ['test_keyspace/80-'])
        # When the binlog players/filtered replication is turned on, the query
        # service must be turned off on the destination masters.
        # The tested behavior is a safeguard to prevent that somebody can
        # accidentally modify data on the destination masters while they are not
        # migrated yet and the source shards are still the source of truth.
        shard_2_master.wait_for_vttablet_state('NOT_SERVING')
        shard_3_master.wait_for_vttablet_state('NOT_SERVING')

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

        # Check that the throttler was enabled.
        self.check_throttler_service(shard_2_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)
        self.check_throttler_service(shard_3_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Executing MultiValue Insert Queries')
        self._exec_multi_shard_dmls()
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)

        logging.debug('Checking MultiValue Insert Queries')
        self._check_multi_shard_values()
        self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_1_slave1,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for destination master tablets, make sure we have it all
        if base_sharding.use_rbr:
            # We submitted non-annotated DMLs, that are properly routed
            # with RBR, but not with SBR. So the first shard counts
            # are smaller. In the second shard, we submitted statements
            # that affect more than one keyspace id. These will result
            # in two queries with RBR. So the count there is higher.
            self.check_running_binlog_player(shard_2_master, 4018, 2008)
            self.check_running_binlog_player(shard_3_master, 4028, 2008)
        else:
            self.check_running_binlog_player(shard_2_master, 4022, 2008)
            self.check_running_binlog_player(shard_3_master, 4024, 2008)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 1, 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 2, 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low', 1)
        monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high', 2)

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)
        self.check_binlog_server_vars(shard_1_slave2,
                                      horizontal=True,
                                      min_statements=800,
                                      min_transactions=800)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                        auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace(
            'test_ny',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace(
            'test_ny',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'PlannedReparentShard', '-keyspace_shard', 'test_keyspace/80-c0',
            '-new_master', shard_2_replica1.tablet_alias
        ])

        # update our test variables to point at the new master
        shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

        logging.debug(
            'Inserting lots of data on source shard after reparenting')
        self._insert_lots(3000, base=2000)
        logging.debug('Checking 80 percent of data was sent fairly quickly')
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug(
            'DELAY 1: %s max_lag=%d ms avg_lag=%d ms',
            monitor_thread_1.thread_name, monitor_thread_1.max_lag_ms,
            monitor_thread_1.lag_sum_ms / monitor_thread_1.sample_count)
        logging.debug(
            'DELAY 2: %s max_lag=%d ms avg_lag=%d ms',
            monitor_thread_2.thread_name, monitor_thread_2.max_lag_ms,
            monitor_thread_2.lag_sum_ms / monitor_thread_2.sample_count)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-c0 c0-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_2_master)
        self.check_no_binlog_player(shard_3_master)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # make sure we can't delete the destination shard now that it's serving
        _, stderr = utils.run_vtctl(['DeleteShard', 'test_keyspace/80-c0'],
                                    expect_fail=True)
        self.assertIn('is still serving, cannot delete it', stderr)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
            shard_3_master, shard_3_replica, shard_3_rdonly1
        ])

Example #55

0

Show file

    def test_vertical_split(self):
        # the worker will do everything. We test with source_reader_count=10
        # (down from default=20) as connection pool is not big enough for 20.
        # min_table_size_for_split is set to 1 as to force a split even on the
        # small table we have.
        utils.run_vtctl([
            'CopySchemaShard', '--tables', 'moving.*,view1',
            source_rdonly1.tablet_alias, 'destination_keyspace/0'
        ],
                        auto_log=True)

        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'VerticalSplitClone', '--tables', 'moving.*,view1',
            '--source_reader_count', '10', '--min_table_size_for_split', '1',
            'destination_keyspace/0'
        ],
                           auto_log=True)
        # One of the two source rdonly tablets went spare after the clone.
        # Force a healthcheck on both to get them back to "rdonly".
        for t in [source_rdonly1, source_rdonly2]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

        # check values are present
        self._check_values(destination_master, 'vt_destination_keyspace',
                           'moving1', self.moving1_first, 100)
        self._check_values(destination_master, 'vt_destination_keyspace',
                           'moving2', self.moving2_first, 100)
        self._check_values(destination_master, 'vt_destination_keyspace',
                           'view1', self.moving1_first, 100)

        # check the binlog players is running
        destination_master.wait_for_binlog_player_count(1)

        # add values to source, make sure they're replicated
        moving1_first_add1 = self._insert_values('moving1', 100)
        _ = self._insert_values('staying1', 100)
        moving2_first_add1 = self._insert_values('moving2', 100)
        self._check_values_timeout(destination_master,
                                   'vt_destination_keyspace', 'moving1',
                                   moving1_first_add1, 100)
        self._check_values_timeout(destination_master,
                                   'vt_destination_keyspace', 'moving2',
                                   moving2_first_add1, 100)

        # use vtworker to compare the data
        for t in [destination_rdonly1, destination_rdonly2]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])
        logging.debug('Running vtworker VerticalSplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'VerticalSplitDiff', 'destination_keyspace/0'
        ],
                           auto_log=True)
        # One of each source and dest rdonly tablet went spare after the diff.
        # Force a healthcheck on all four to get them back to "rdonly".
        for t in [
                source_rdonly1, source_rdonly2, destination_rdonly1,
                destination_rdonly2
        ]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

        utils.pause('Good time to test vtworker for diffs')

        # get status for destination master tablet, make sure we have it all
        destination_master_status = destination_master.get_status()
        self.assertIn('Binlog player state: Running',
                      destination_master_status)
        self.assertIn('moving.*', destination_master_status)
        self.assertIn(
            '<td><b>All</b>: 1000<br><b>Query</b>: 700<br>'
            '<b>Transaction</b>: 300<br></td>', destination_master_status)
        self.assertIn('</html>', destination_master_status)

        # check query service is off on destination master, as filtered
        # replication is enabled. Even health check should not interfere.
        destination_master_vars = utils.get_vars(destination_master.port)
        self.assertEqual(destination_master_vars['TabletStateName'],
                         'NOT_SERVING')

        # check we can't migrate the master just yet
        utils.run_vtctl(
            ['MigrateServedFrom', 'destination_keyspace/0', 'master'],
            expect_fail=True)

        # migrate rdonly only in test_ny cell, make sure nothing is migrated
        # in test_nj
        utils.run_vtctl([
            'MigrateServedFrom', '--cells=test_ny', 'destination_keyspace/0',
            'rdonly'
        ],
                        auto_log=True)
        self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                                 'ServedFrom(rdonly): source_keyspace\n'
                                 'ServedFrom(replica): source_keyspace\n')
        self._check_blacklisted_tables(source_master, None)
        self._check_blacklisted_tables(source_replica, None)
        self._check_blacklisted_tables(source_rdonly1, None)
        self._check_blacklisted_tables(source_rdonly2, None)

        # migrate test_nj only, using command line manual fix command,
        # and restore it back.
        keyspace_json = utils.run_vtctl_json(
            ['GetKeyspace', 'destination_keyspace'])
        found = False
        for ksf in keyspace_json['served_froms']:
            if ksf['tablet_type'] == topodata_pb2.RDONLY:
                found = True
                self.assertEqual(ksf['cells'], ['test_nj'])
        self.assertTrue(found)
        utils.run_vtctl([
            'SetKeyspaceServedFrom', '-source=source_keyspace', '-remove',
            '-cells=test_nj', 'destination_keyspace', 'rdonly'
        ],
                        auto_log=True)
        keyspace_json = utils.run_vtctl_json(
            ['GetKeyspace', 'destination_keyspace'])
        found = False
        for ksf in keyspace_json['served_froms']:
            if ksf['tablet_type'] == topodata_pb2.RDONLY:
                found = True
        self.assertFalse(found)
        utils.run_vtctl([
            'SetKeyspaceServedFrom', '-source=source_keyspace',
            'destination_keyspace', 'rdonly'
        ],
                        auto_log=True)
        keyspace_json = utils.run_vtctl_json(
            ['GetKeyspace', 'destination_keyspace'])
        found = False
        for ksf in keyspace_json['served_froms']:
            if ksf['tablet_type'] == topodata_pb2.RDONLY:
                found = True
                self.assertNotIn('cells', ksf)
        self.assertTrue(found)

        # now serve rdonly from the destination shards
        utils.run_vtctl(
            ['MigrateServedFrom', 'destination_keyspace/0', 'rdonly'],
            auto_log=True)
        self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                                 'ServedFrom(replica): source_keyspace\n')
        self._check_blacklisted_tables(source_master, None)
        self._check_blacklisted_tables(source_replica, None)
        self._check_blacklisted_tables(source_rdonly1, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly2, ['moving.*', 'view1'])
        self._check_client_conn_redirection('destination_keyspace',
                                            ['master', 'replica'],
                                            ['moving1', 'moving2'])

        # then serve replica from the destination shards
        utils.run_vtctl(
            ['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
            auto_log=True)
        self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
        self._check_blacklisted_tables(source_master, None)
        self._check_blacklisted_tables(source_replica, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly1, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly2, ['moving.*', 'view1'])
        self._check_client_conn_redirection('destination_keyspace', ['master'],
                                            ['moving1', 'moving2'])

        # move replica back and forth
        utils.run_vtctl([
            'MigrateServedFrom', '-reverse', 'destination_keyspace/0',
            'replica'
        ],
                        auto_log=True)
        self._check_srv_keyspace('ServedFrom(master): source_keyspace\n'
                                 'ServedFrom(replica): source_keyspace\n')
        self._check_blacklisted_tables(source_master, None)
        self._check_blacklisted_tables(source_replica, None)
        self._check_blacklisted_tables(source_rdonly1, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly2, ['moving.*', 'view1'])
        utils.run_vtctl(
            ['MigrateServedFrom', 'destination_keyspace/0', 'replica'],
            auto_log=True)
        self._check_srv_keyspace('ServedFrom(master): source_keyspace\n')
        self._check_blacklisted_tables(source_master, None)
        self._check_blacklisted_tables(source_replica, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly1, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly2, ['moving.*', 'view1'])
        self._check_client_conn_redirection('destination_keyspace', ['master'],
                                            ['moving1', 'moving2'])

        # then serve master from the destination shards
        utils.run_vtctl(
            ['MigrateServedFrom', 'destination_keyspace/0', 'master'],
            auto_log=True)
        self._check_srv_keyspace('')
        self._check_blacklisted_tables(source_master, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_replica, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly1, ['moving.*', 'view1'])
        self._check_blacklisted_tables(source_rdonly2, ['moving.*', 'view1'])

        # check the binlog player is gone now
        destination_master.wait_for_binlog_player_count(0)

        # check the stats are correct
        self._check_stats()

        self._verify_vtctl_set_shard_tablet_control()

Example #56

0

Show file

File: custom_sharding.py Project: conceptslearningmachine-FEIN-85-1759293/vitess-gh

    def test_custom_end_to_end(self):
        """Runs through the common operations of a custom sharded keyspace.

    Tests creation with one shard, schema change, reading / writing
    data, adding one more shard, reading / writing data from both
    shards, applying schema changes again, and reading / writing data
    from both shards again.
    """

        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # start the first shard only for now
        shard_0_master.init_tablet('replica',
                                   keyspace='test_keyspace',
                                   shard='0',
                                   tablet_index=0)
        shard_0_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='0',
                                    tablet_index=1)
        shard_0_rdonly.init_tablet('rdonly',
                                   keyspace='test_keyspace',
                                   shard='0',
                                   tablet_index=2)

        for t in [shard_0_master, shard_0_replica, shard_0_rdonly]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        for t in [shard_0_master, shard_0_replica, shard_0_rdonly]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.wait_for_tablet_type(shard_0_replica.tablet_alias, 'replica')
        utils.wait_for_tablet_type(shard_0_rdonly.tablet_alias, 'rdonly')
        for t in [shard_0_master, shard_0_replica, shard_0_rdonly]:
            t.wait_for_vttablet_state('SERVING')

        self._check_shards_count_in_srv_keyspace(1)
        s = utils.run_vtctl_json(['GetShard', 'test_keyspace/0'])
        self.assertEqual(s['is_master_serving'], True)

        # create a table on shard 0
        sql = '''create table data(
id bigint auto_increment,
name varchar(64),
primary key (id)
) Engine=InnoDB'''
        utils.run_vtctl(['ApplySchema', '-sql=' + sql, 'test_keyspace'],
                        auto_log=True)

        # reload schema everywhere so the QueryService knows about the tables
        for t in [shard_0_master, shard_0_replica, shard_0_rdonly]:
            utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

        # create shard 1
        shard_1_master.init_tablet('replica',
                                   keyspace='test_keyspace',
                                   shard='1',
                                   tablet_index=0)
        shard_1_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='1',
                                    tablet_index=1)
        shard_1_rdonly.init_tablet('rdonly',
                                   keyspace='test_keyspace',
                                   shard='1',
                                   tablet_index=2)

        for t in [shard_1_master, shard_1_replica, shard_1_rdonly]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        for t in [shard_1_master, shard_1_replica, shard_1_rdonly]:
            t.wait_for_vttablet_state('NOT_SERVING')

        s = utils.run_vtctl_json(['GetShard', 'test_keyspace/1'])
        self.assertEqual(s['is_master_serving'], True)

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/1',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)
        utils.wait_for_tablet_type(shard_1_replica.tablet_alias, 'replica')
        utils.wait_for_tablet_type(shard_1_rdonly.tablet_alias, 'rdonly')
        for t in [shard_1_master, shard_1_replica, shard_1_rdonly]:
            t.wait_for_vttablet_state('SERVING')
        utils.run_vtctl([
            'CopySchemaShard', shard_0_rdonly.tablet_alias, 'test_keyspace/1'
        ],
                        auto_log=True)

        # we need to rebuild SrvKeyspace here to account for the new shards.
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        self._check_shards_count_in_srv_keyspace(2)

        # must start vtgate after tablets are up, or else wait until 1min refresh
        utils.VtGate().start(tablets=[
            shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master,
            shard_1_replica, shard_1_rdonly
        ])
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.1.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.1.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.1.rdonly', 1)

        # insert and check data on shard 0
        self._insert_data('0', 100, 10)
        self._check_data('0', 100, 10)

        # insert and check data on shard 1
        self._insert_data('1', 200, 10)
        self._check_data('1', 200, 10)

        # create a second table on all shards
        sql = '''create table data2(
id bigint auto_increment,
name varchar(64),
primary key (id)
) Engine=InnoDB'''
        utils.run_vtctl(['ApplySchema', '-sql=' + sql, 'test_keyspace'],
                        auto_log=True)

        # reload schema everywhere so the QueryService knows about the tables
        for t in all_tablets:
            utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

        # insert and read data on all shards
        self._insert_data('0', 300, 10, table='data2')
        self._insert_data('1', 400, 10, table='data2')
        self._check_data('0', 300, 10, table='data2')
        self._check_data('1', 400, 10, table='data2')

        # Now test SplitQuery API works (used in MapReduce usually, but bringing
        # up a full MR-capable cluster is too much for this test environment)
        sql = 'select id, name from data'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 4)
        shard0count = 0
        shard1count = 0
        for q in s:
            if q['shard_part']['shards'][0] == '0':
                shard0count += 1
            if q['shard_part']['shards'][0] == '1':
                shard1count += 1
        self.assertEqual(shard0count, 2)
        self.assertEqual(shard1count, 2)

        # run the queries, aggregate the results, make sure we have all rows
        rows = {}
        for q in s:
            bindvars = {}
            for name, value in q['query']['bind_variables'].iteritems():
                # vtctl encodes bytes as base64.
                bindvars[name] = int(base64.standard_b64decode(value['value']))
            qr = utils.vtgate.execute_shards(q['query']['sql'],
                                             'test_keyspace',
                                             ','.join(
                                                 q['shard_part']['shards']),
                                             tablet_type='master',
                                             bindvars=bindvars)
            for r in qr['rows']:
                rows[int(r[0])] = r[1]
        self.assertEqual(len(rows), 20)
        expected = {}
        for i in xrange(10):
            expected[100 + i] = 'row %d' % (100 + i)
            expected[200 + i] = 'row %d' % (200 + i)
        self.assertEqual(rows, expected)

Example #57

0

Show file

    def test_merge_sharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'custom_sharding_key',
            '--sharding_column_type', keyspace_id_type, '--split_shard_count',
            '4', 'test_keyspace'
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-40')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
        shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
        shard_1_master.init_tablet('master', 'test_keyspace', '40-80')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
        shard_2_master.init_tablet('master', 'test_keyspace', '80-')
        shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
        shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['split_shard_count'], 4)

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly,
                shard_1_master, shard_1_replica, shard_1_rdonly,
                shard_2_master, shard_2_replica, shard_2_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly,
                shard_1_master, shard_1_replica, shard_1_rdonly,
                shard_2_master, shard_2_replica, shard_2_rdonly
        ]:
            t.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-40', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/40-80',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_2_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_replica]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'replica'])
        for t in [shard_0_rdonly, shard_1_rdonly]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

        # create the merge shards
        shard_dest_master.init_tablet('master', 'test_keyspace', '-80')
        shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
            t.start_vttablet(wait_for_state=None)
        for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80',
            shard_dest_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -40 40-80 80-\n'
                                 'Partitions(rdonly): -40 40-80 80-\n'
                                 'Partitions(replica): -40 40-80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # copy the schema
        utils.run_vtctl([
            'CopySchemaShard', shard_0_rdonly.tablet_alias, 'test_keyspace/-80'
        ],
                        auto_log=True)

        # copy the data (will also start filtered replication), reset source
        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'SplitClone', '--source_reader_count', '10',
            '--min_table_size_for_split', '1',
            '--min_healthy_rdonly_endpoints', '1', 'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check binlog player variables
        self.check_destination_master(
            shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_0_replica, horizontal=True)
        self.check_binlog_server_vars(shard_1_replica, horizontal=True)

        # testing filtered replication: insert a bunch of data on shard 0 and 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shards')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 10)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 30)
        self.check_binlog_player_vars(
            shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'],
            seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_0_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)
        self.check_binlog_server_vars(shard_1_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(
            ['RunHealthCheck', shard_dest_rdonly.tablet_alias, 'rdonly'])
        logging.debug('Running vtworker SplitDiff on first half')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_endpoints', '1', '--source_uid', '0',
            'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        logging.debug('Running vtworker SplitDiff on second half')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_endpoints', '1', '--source_uid', '1',
            'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # get status for the destination master tablet, make sure we have it all
        self.check_running_binlog_player(shard_dest_master, 3000, 1000)

        # check destination master query service is not running
        utils.check_tablet_query_service(self, shard_dest_master, False, False)
        stream_health = utils.run_vtctl_json([
            'VtTabletStreamHealth', '-count', '1',
            shard_dest_master.tablet_alias
        ])
        logging.debug('Got health: %s', str(stream_health))
        self.assertIn('realtime_stats', stream_health)
        self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_dest_master.get_healthz()

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -40 40-80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -40 40-80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # now serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -40 40-80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # now serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_tablet_query_service(self, shard_0_master, False, True)
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_dest_master)

        # kill the original tablets in the original shards
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master,
            shard_1_replica, shard_1_rdonly
        ])
        for t in [
                shard_0_replica, shard_0_rdonly, shard_1_replica,
                shard_1_rdonly
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        for t in [shard_0_master, shard_1_master]:
            utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                            auto_log=True)

        # delete the original shards
        utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
        utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_2_master, shard_2_replica, shard_2_rdonly, shard_dest_master,
            shard_dest_replica, shard_dest_rdonly
        ])

Example #58

0

Show file

    def _test_sanity(self):
        # Start up a master mysql and vttablet
        utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
        utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
        tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
        utils.validate_topology()

        # if these statements don't run before the tablet it will wedge
        # waiting for the db to become accessible. this is more a bug than
        # a feature.
        tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
                              self._populate_vt_select_test)

        tablet_62344.start_vttablet()

        # make sure the query service is started right away.
        qr = tablet_62344.execute('select id, msg from vt_select_test')
        self.assertEqual(len(qr['rows']), 4,
                         'expected 4 rows in vt_select_test: %s' % str(qr))
        self.assertEqual(qr['fields'][0]['name'], 'id')
        self.assertEqual(qr['fields'][1]['name'], 'msg')

        # test exclude_field_names to vttablet works as expected.
        qr = tablet_62344.execute('select id, msg from vt_select_test',
                                  execute_options='exclude_field_names:true ')
        self.assertEqual(len(qr['rows']), 4,
                         'expected 4 rows in vt_select_test: %s' % str(qr))
        self.assertNotIn('name', qr['fields'][0])
        self.assertNotIn('name', qr['fields'][1])

        # make sure direct dba queries work
        query_result = utils.run_vtctl_json([
            'ExecuteFetchAsDba', '-json', tablet_62344.tablet_alias,
            'select * from vt_test_keyspace.vt_select_test'
        ])
        self.assertEqual(
            len(query_result['rows']), 4,
            'expected 4 rows in vt_select_test: %s' % str(query_result))
        self.assertEqual(
            len(query_result['fields']), 2,
            'expected 2 fields in vt_select_test: %s' % str(query_result))

        # check Ping / RefreshState / RefreshStateByShard
        utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
        utils.run_vtctl(['RefreshState', tablet_62344.tablet_alias])
        utils.run_vtctl(['RefreshStateByShard', 'test_keyspace/0'])
        utils.run_vtctl(
            ['RefreshStateByShard', '--cells=test_nj', 'test_keyspace/0'])

        # Quickly check basic actions.
        utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias])
        utils.wait_db_read_only(62344)

        utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias])
        utils.check_db_read_write(62344)

        utils.run_vtctl(['DemoteMaster', tablet_62344.tablet_alias])
        utils.wait_db_read_only(62344)

        utils.validate_topology()
        utils.run_vtctl(['ValidateKeyspace', 'test_keyspace'])
        # not pinging tablets, as it enables replication checks, and they
        # break because we only have a single master, no slaves
        utils.run_vtctl(
            ['ValidateShard', '-ping-tablets=false', 'test_keyspace/0'])

        tablet_62344.kill_vttablet()

Example #59

0

Show file

    def test_merge_sharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'custom_ksid_col',
            '--sharding_column_type', base_sharding.keyspace_id_type,
            'test_keyspace'
        ])

        shard_0_master.init_tablet('replica', 'test_keyspace', '-40')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
        shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
        shard_1_master.init_tablet('replica', 'test_keyspace', '40-80')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
        shard_2_master.init_tablet('replica', 'test_keyspace', '80-')
        shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
        shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        # rebuild and check SrvKeyspace
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly,
                shard_1_master, shard_1_replica, shard_1_rdonly,
                shard_2_master, shard_2_replica, shard_2_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             binlog_use_v3_resharding_mode=False)

        # won't be serving, no replication state
        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly,
                shard_1_master, shard_1_replica, shard_1_rdonly,
                shard_2_master, shard_2_replica, shard_2_rdonly
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-40',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/40-80',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_replica]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_rdonly, shard_1_rdonly]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the merge shards
        shard_dest_master.init_tablet('replica', 'test_keyspace', '-80')
        shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

        # start vttablet on the destination shard (no db created,
        # so they're all not serving)
        for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
            t.start_vttablet(wait_for_state=None,
                             binlog_use_v3_resharding_mode=False)
        for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_dest_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -40 40-80 80-\n'
            'Partitions(rdonly): -40 40-80 80-\n'
            'Partitions(replica): -40 40-80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # copy the schema
        utils.run_vtctl([
            'CopySchemaShard', shard_0_rdonly.tablet_alias, 'test_keyspace/-80'
        ],
                        auto_log=True)

        # copy the data (will also start filtered replication), reset source
        # Run vtworker as daemon for the following SplitClone commands.
        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            [
                '--cell', 'test_nj', '--command_display_interval', '10ms',
                '--use_v3_resharding_mode=false'
            ],
            auto_log=True)

        # Initial clone (online).
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--chunk_count', '10',
            '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets', '1',
            'test_keyspace/-80'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 2, 0, 0, 0)

        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Modify the destination shard. SplitClone will revert the changes.
        # Delete row 1 (provokes an insert).
        shard_dest_master.mquery('vt_test_keyspace',
                                 'delete from resharding1 where id=1',
                                 write=True)
        # Update row 2 (provokes an update).
        shard_dest_master.mquery(
            'vt_test_keyspace',
            "update resharding1 set msg='msg-not-2' where id=2",
            write=True)
        # Insert row 0 (provokes a delete).
        self._insert_value(shard_dest_master, 'resharding1', 0, 'msg0',
                           0x5000000000000000)

        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/-80'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Change tablets, which were taken offline, back to rdonly.
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 1, 1, 0)
        self.verify_reconciliation_counters(worker_port, 'Offline',
                                            'resharding1', 0, 0, 0, 2)
        # Terminate worker daemon because it is no longer needed.
        utils.kill_sub_process(worker_proc, soft=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check binlog player variables
        self.check_destination_master(
            shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_0_replica, horizontal=True)
        self.check_binlog_server_vars(shard_1_replica, horizontal=True)

        # testing filtered replication: insert a bunch of data on shard 0 and 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shards')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 10)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 30)
        self.check_binlog_player_vars(
            shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'],
            seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_0_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)
        self.check_binlog_server_vars(shard_1_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias])
        logging.debug('Running vtworker SplitDiff on first half')
        utils.run_vtworker([
            '-cell', 'test_nj', '--use_v3_resharding_mode=false', 'SplitDiff',
            '--exclude_tables', 'unrelated', '--min_healthy_rdonly_tablets',
            '1', '--source_uid', '0', 'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        logging.debug('Running vtworker SplitDiff on second half')
        utils.run_vtworker([
            '-cell', 'test_nj', '--use_v3_resharding_mode=false', 'SplitDiff',
            '--exclude_tables', 'unrelated', '--min_healthy_rdonly_tablets',
            '1', '--source_uid', '1', 'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # get status for the destination master tablet, make sure we have it all
        self.check_running_binlog_player(shard_dest_master, 3000, 1000)

        # check destination master query service is not running
        utils.check_tablet_query_service(self, shard_dest_master, False, False)
        stream_health = utils.run_vtctl_json([
            'VtTabletStreamHealth', '-count', '1',
            shard_dest_master.tablet_alias
        ])
        logging.debug('Got health: %s', str(stream_health))
        self.assertIn('realtime_stats', stream_health)
        self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_dest_master.get_healthz()

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -40 40-80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -40 40-80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # now serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -40 40-80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # now serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_master, False, True)
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_dest_master)

        # kill the original tablets in the original shards
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master,
            shard_1_replica, shard_1_rdonly
        ])
        for t in [
                shard_0_replica, shard_0_rdonly, shard_1_replica,
                shard_1_rdonly
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        for t in [shard_0_master, shard_1_master]:
            utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                            auto_log=True)

        # delete the original shards
        utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
        utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_2_master, shard_2_replica, shard_2_rdonly, shard_dest_master,
            shard_dest_replica, shard_dest_rdonly
        ])

Example #60

0

Show file

File: resharding.py Project: zjnxyz/vitess

    def test_resharding(self):
        # we're going to reparent and swap these two
        global shard_2_master, shard_2_replica1

        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', '--split_shard_count', '2',
            'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_sharding_key',
            'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', '-split_shard_count', '4',
            'test_keyspace', 'custom_sharding_key', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['split_shard_count'], 4)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_0_ny_rdonly.wait_for_vttablet_state('SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_ny_rdonly.wait_for_vttablet_state('SERVING')
        shard_1_rdonly1.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'replica'])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None,
                                      target_tablet_type='replica')
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_replica, shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/c0-', shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # the worker will do everything. We test with source_reader_count=10
        # (down from default=20) as connection pool is not big enough for 20.
        # min_table_size_for_split is set to 1 as to force a split even on the
        # small table we have.
        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'SplitClone', '--exclude_tables', 'unrelated',
            '--source_reader_count', '10', '--min_table_size_for_split', '1',
            'test_keyspace/80-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        self._check_stream_health_equals_binlog_player_vars(shard_2_master)
        self._check_stream_health_equals_binlog_player_vars(shard_3_master)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master,
                                       seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master,
                                       seconds_behind_master_max=30)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(
            ['RunHealthCheck', shard_3_rdonly1.tablet_alias, 'rdonly'])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('Binlog player state: Running', shard_2_master_status)
        self.assertIn(
            '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br>'
            '<b>Transaction</b>: 2000<br></td>', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low')
        monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high')

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(
            ['RunHealthCheck', shard_1_slave2.tablet_alias, 'replica'])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(
            ['RunHealthCheck', shard_3_master.tablet_alias, 'replica'],
            auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])

        # update our test variables to point at the new master
        shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

        logging.debug(
            'Inserting lots of data on source shard after reparenting')
        self._insert_lots(3000, base=2000)
        logging.debug('Checking 80 percent of data was sent fairly quickly')
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug('DELAY 1: %s max_lag=%d avg_lag=%d',
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug('DELAY 2: %s max_lag=%d avg_lag=%d',
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('No binlog player is running', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly1
        ])