Esempio n. 1
0
def tearDownModule():
  logging.debug("in tearDownModule")
  if utils.options.skip_teardown:
    return
  logging.debug("Tearing down the servers and setup")
  teardown_procs = [shard_0_master.teardown_mysql(),
                    shard_0_replica.teardown_mysql(),
                    shard_1_master.teardown_mysql(),
                    shard_1_replica.teardown_mysql(),
                   ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  utils.zk_teardown()
  shard_0_master.kill_vttablet()
  shard_0_replica.kill_vttablet()
  shard_1_master.kill_vttablet()
  shard_1_replica.kill_vttablet()

  utils.kill_sub_processes()
  utils.remove_tmp_files()

  shard_0_master.remove_tree()
  shard_0_replica.remove_tree()
  shard_1_master.remove_tree()
  shard_1_replica.remove_tree()
Esempio n. 2
0
def tearDownModule():
  global vtgate_server
  logging.debug("in tearDownModule")
  if utils.options.skip_teardown:
    return
  logging.debug("Tearing down the servers and setup")
  utils.vtgate_kill(vtgate_server)
  tablet.kill_tablets([shard_0_master, shard_0_replica, shard_1_master,
                       shard_1_replica])
  teardown_procs = [shard_0_master.teardown_mysql(),
                    shard_0_replica.teardown_mysql(),
                    shard_1_master.teardown_mysql(),
                    shard_1_replica.teardown_mysql(),
                   ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server_teardown()

  utils.kill_sub_processes()
  utils.remove_tmp_files()

  shard_0_master.remove_tree()
  shard_0_replica.remove_tree()
  shard_1_master.remove_tree()
  shard_1_replica.remove_tree()
Esempio n. 3
0
def setUpModule():
    try:
        environment.topo_server_setup()

        setup_procs = [
            shard_0_master.init_mysql(),
            shard_0_replica.init_mysql(),
            shard_0_ny_slave.init_mysql(),
            shard_1_master.init_mysql(),
            shard_1_slave1.init_mysql(),
            shard_1_slave2.init_mysql(),
            shard_1_ny_slave.init_mysql(),
            shard_1_rdonly.init_mysql(),
            shard_2_master.init_mysql(),
            shard_2_replica1.init_mysql(),
            shard_2_replica2.init_mysql(),
            shard_3_master.init_mysql(),
            shard_3_replica.init_mysql(),
            shard_3_rdonly.init_mysql(),
        ]
        utils.Vtctld().start()
        utils.wait_procs(setup_procs)
    except:
        tearDownModule()
        raise
Esempio n. 4
0
    def test_no_mysql_healthcheck(self):
        """This test starts a vttablet with no mysql port, while mysql is down.
    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
        # we need replication to be enabled, so the slave tablet can be healthy.
        for t in tablet_62344, tablet_62044:
            t.create_db("vt_test_keyspace")
        pos = mysql_flavor().master_position(tablet_62344)
        changeMasterCmds = mysql_flavor().change_master_commands(utils.hostname, tablet_62344.mysql_port, pos)
        tablet_62044.mquery("", ["RESET MASTER", "RESET SLAVE"] + changeMasterCmds + ["START SLAVE"])

        # now shutdown all mysqld
        shutdown_procs = [tablet_62344.shutdown_mysql(), tablet_62044.shutdown_mysql()]
        utils.wait_procs(shutdown_procs)

        # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
        tablet_62344.init_tablet("master", "test_keyspace", "0")
        tablet_62044.init_tablet("spare", "test_keyspace", "0", include_mysql_port=False)
        for t in tablet_62344, tablet_62044:
            t.start_vttablet(
                wait_for_state=None, target_tablet_type="replica", full_mycnf_args=True, include_mysql_port=False
            )
        for t in tablet_62344, tablet_62044:
            t.wait_for_vttablet_state("NOT_SERVING")
            self.check_healthz(t, False)

        # restart mysqld
        start_procs = [tablet_62344.start_mysql(), tablet_62044.start_mysql()]
        utils.wait_procs(start_procs)

        # the master should still be healthy
        utils.run_vtctl(["RunHealthCheck", tablet_62344.tablet_alias, "replica"], auto_log=True)
        self.check_healthz(tablet_62344, True)

        # the slave won't be healthy at first, as replication is not running
        utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True)
        self.check_healthz(tablet_62044, False)
        tablet_62044.wait_for_vttablet_state("NOT_SERVING")

        # restart replication
        tablet_62044.mquery("", ["START SLAVE"])

        # wait for the tablet to become healthy and fix its mysql port
        utils.run_vtctl(["RunHealthCheck", tablet_62044.tablet_alias, "replica"], auto_log=True)
        tablet_62044.wait_for_vttablet_state("SERVING")
        self.check_healthz(tablet_62044, True)

        for t in tablet_62344, tablet_62044:
            # wait for mysql port to show up
            timeout = 10
            while True:
                ti = utils.run_vtctl_json(["GetTablet", t.tablet_alias])
                if "mysql" in ti["Portmap"]:
                    break
                timeout = utils.wait_step("mysql port in tablet record", timeout)
            self.assertEqual(ti["Portmap"]["mysql"], t.mysql_port)

        # all done
        tablet.kill_tablets([tablet_62344, tablet_62044])
Esempio n. 5
0
def tearDownModule():
  utils.required_teardown()
  if utils.options.skip_teardown:
    return

  if use_mysqlctld:
    # Try to terminate mysqlctld gracefully, so it kills its mysqld.
    for proc in setup_procs:
      utils.kill_sub_process(proc, soft=True)
    teardown_procs = setup_procs
  else:
    teardown_procs = [
        tablet_master.teardown_mysql(),
        tablet_replica1.teardown_mysql(),
        tablet_replica2.teardown_mysql(),
    ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  tablet_master.remove_tree()
  tablet_replica1.remove_tree()
  tablet_replica2.remove_tree()
Esempio n. 6
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  tablet.kill_tablets([src_master, src_replica, src_rdonly1, src_rdonly2,
                       dst_master, dst_replica])

  teardown_procs = [
      src_master.teardown_mysql(),
      src_replica.teardown_mysql(),
      src_rdonly1.teardown_mysql(),
      src_rdonly2.teardown_mysql(),
      dst_master.teardown_mysql(),
      dst_replica.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  src_master.remove_tree()
  src_replica.remove_tree()
  src_rdonly1.remove_tree()
  src_rdonly2.remove_tree()
  dst_master.remove_tree()
  dst_replica.remove_tree()
Esempio n. 7
0
def setUpModule():
  try:
    environment.topo_server().setup()

    setup_procs = [
        shard_master.init_mysql(),
        shard_replica.init_mysql(),
        shard_rdonly1.init_mysql(),
        shard_0_master.init_mysql(),
        shard_0_replica.init_mysql(),
        shard_0_rdonly1.init_mysql(),
        shard_1_master.init_mysql(),
        shard_1_replica.init_mysql(),
        shard_1_rdonly1.init_mysql(),
    ]
    utils.wait_procs(setup_procs)
    init_keyspace()
    logging.debug('environment set up with the following shards and tablets:')
    logging.debug('=========================================================')
    logging.debug('TABLETS: test_keyspace/0:\n%s', all_shard_tablets)
    logging.debug('TABLETS: test_keyspace/-80:\n%s', shard_0_tablets)
    logging.debug('TABLETS: test_keyspace/80-:\n%s', shard_1_tablets)
  except:
    tearDownModule()
    raise
 def set_up(self):
   try:
     environment.topo_server_setup()
     utils.wait_procs([t.init_mysql() for t in self.tablets])
     utils.run_vtctl(['CreateKeyspace', self.keyspace])
     utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', self.keyspace, 'keyspace_id', 'uint64'])
     for t in self.tablets:
       t.init_tablet(t.type, keyspace=self.keyspace, shard=t.shard)
     utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True)
     for t in self.tablets:
       t.create_db('vt_' + self.keyspace)
       t.mquery(t.dbname, create_table)
       t.start_vttablet(wait_for_state=None)
     for t in self.tablets:
       t.wait_for_vttablet_state('SERVING')
     for t in self.tablets:
       if t.type == "master":
         utils.run_vtctl(['ReparentShard', '-force', self.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True)
     utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True)
     self.vtgate_server, self.vtgate_port = utils.vtgate_start()
     vtgate_client = zkocc.ZkOccConnection("localhost:%u" % self.vtgate_port, "test_nj", 30.0)
     topology.read_topology(vtgate_client)
   except:
     self.shutdown()
     raise
Esempio n. 9
0
  def setUp(self):
    utils.wait_procs([self.tablet.init_mysql()])
    self.tablet.mquery(
        '', ['create database vt_test_keyspace', 'set global read_only = off'])

    self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace')
    with open(
        os.path.join(self.vttop, 'test', 'test_data', 'test_schema.sql')) as f:
      self.clean_sqls = []
      self.init_sqls = []
      clean_mode = False
      for line in f:
        line = line.rstrip()
        if line == '# clean':
          clean_mode = True
        if line=='' or line.startswith('#'):
          continue
        if clean_mode:
          self.clean_sqls.append(line)
        else:
          self.init_sqls.append(line)
      try:
        for line in self.init_sqls:
          mcu.execute(line, {})
      finally:
        mcu.close()

    customrules = os.path.join(environment.tmproot, 'customrules.json')
    schema_override = os.path.join(environment.tmproot, 'schema_override.json')
    self.create_schema_override(schema_override)
    table_acl_config = os.path.join(
        environment.vttop, 'test', 'test_data', 'table_acl_config.json')

    environment.topo_server().setup()
    self.create_customrules(customrules);
    utils.run_vtctl('CreateKeyspace -force test_keyspace')
    self.tablet.init_tablet('master', 'test_keyspace', '0')
    if environment.topo_server().flavor() == 'zookeeper':
      self.tablet.start_vttablet(
          memcache=self.memcache,
          zkcustomrules='/zk/test_ca/config/customrules/testrules',
          schema_override=schema_override,
          table_acl_config=table_acl_config,
      )
    else:
      self.tablet.start_vttablet(
          memcache=self.memcache,
          filecustomrules=customrules,
          schema_override=schema_override,
          table_acl_config=table_acl_config,
      )
    self.conn = self.connect()
    self.txlogger = utils.curl(
        self.url('/debug/txlog'), background=True,
        stdout=open(self.txlog_file, 'w'))
    self.txlog = framework.Tailer(self.txlog_file, flush=self.tablet.flush)
    self.log = framework.Tailer(
        os.path.join(environment.vtlogroot, 'vttablet.INFO'),
        flush=self.tablet.flush)
    self.querylog = Querylog(self)
Esempio n. 10
0
def tearDownModule():
  utils.required_teardown()
  if utils.options.skip_teardown:
    return

  if utils.vtgate:
    utils.vtgate.kill()
  teardown_procs = [
      source_master.teardown_mysql(),
      source_replica.teardown_mysql(),
      source_rdonly1.teardown_mysql(),
      source_rdonly2.teardown_mysql(),
      destination_master.teardown_mysql(),
      destination_replica.teardown_mysql(),
      destination_rdonly1.teardown_mysql(),
      destination_rdonly2.teardown_mysql(),
  ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  source_master.remove_tree()
  source_replica.remove_tree()
  source_rdonly1.remove_tree()
  source_rdonly2.remove_tree()
  destination_master.remove_tree()
  destination_replica.remove_tree()
  destination_rdonly1.remove_tree()
  destination_rdonly2.remove_tree()
Esempio n. 11
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  tablet.kill_tablets([shard_0_master, shard_0_replica,
                       shard_1_master, shard_1_replica])
  teardown_procs = [
      shard_0_master.teardown_mysql(),
      shard_0_replica.teardown_mysql(),
      shard_1_master.teardown_mysql(),
      shard_1_replica.teardown_mysql(),
      unsharded_master.teardown_mysql(),
      unsharded_replica.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  shard_0_master.remove_tree()
  shard_0_replica.remove_tree()
  shard_1_master.remove_tree()
  shard_1_replica.remove_tree()
  unsharded_master.remove_tree()
  unsharded_replica.remove_tree()
Esempio n. 12
0
def setup():
    utils.zk_setup()

    # start mysql instance external to the test
    setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()]
    utils.wait_procs(setup_procs)
    setup_tablets()
Esempio n. 13
0
 def teardown(self):
   all_tablets = self.tablet_map.values()
   tablet.kill_tablets(all_tablets)
   teardown_procs = [t.teardown_mysql() for t in all_tablets]
   utils.wait_procs(teardown_procs, raise_on_error=False)
   for t in all_tablets:
     t.remove_tree()
Esempio n. 14
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  teardown_procs = [
      shard_0_master.teardown_mysql(),
      shard_0_replica1.teardown_mysql(),
      shard_0_replica2.teardown_mysql(),
      shard_0_rdonly.teardown_mysql(),
      shard_0_backup.teardown_mysql(),
      shard_1_master.teardown_mysql(),
      shard_1_replica1.teardown_mysql(),
      shard_2_master.teardown_mysql(),
      shard_2_replica1.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  shard_0_master.remove_tree()
  shard_0_replica1.remove_tree()
  shard_0_replica2.remove_tree()
  shard_0_rdonly.remove_tree()
  shard_0_backup.remove_tree()
  shard_1_master.remove_tree()
  shard_1_replica1.remove_tree()
  shard_2_master.remove_tree()
  shard_2_replica1.remove_tree()
Esempio n. 15
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  if use_mysqlctld:
    # Try to terminate mysqlctld gracefully, so it kills its mysqld.
    for proc in setup_procs:
      utils.kill_sub_process(proc, soft=True)
    teardown_procs = setup_procs
  else:
    teardown_procs = [
        tablet_62344.teardown_mysql(),
        tablet_31981.teardown_mysql(),
        ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  tablet_62344.remove_tree()
  tablet_31981.remove_tree()

  path = os.path.join(environment.vtdataroot, 'snapshot')
  try:
    shutil.rmtree(path)
  except OSError as e:
    logging.debug("removing snapshot %s: %s", path, str(e))
Esempio n. 16
0
 def set_up(self):
   try:
     environment.topo_server().setup()
     utils.wait_procs([t.init_mysql() for t in self.tablets])
     utils.run_vtctl(['CreateKeyspace', self.keyspace])
     utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', self.keyspace, 'keyspace_id', 'uint64'])
     for t in self.tablets:
       t.init_tablet(t.type, keyspace=self.keyspace, shard=t.shard)
     utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True)
     for t in self.tablets:
       t.create_db('vt_' + self.keyspace)
       t.start_vttablet(
         wait_for_state=None,
         extra_args=['-queryserver-config-schema-reload-time', '1'],
       )
     for t in self.tablets:
       t.wait_for_vttablet_state('SERVING')
     for t in self.tablets:
       if t.type == "master":
         utils.run_vtctl(['ReparentShard', '-force', self.keyspace+'/'+t.shard, t.tablet_alias], auto_log=True)
     utils.run_vtctl(['RebuildKeyspaceGraph', self.keyspace], auto_log=True)
     if self.schema:
       utils.run_vtctl(['ApplySchemaKeyspace', '-simple', '-sql', self.schema, self.keyspace])
     if self.vschema:
       if self.vschema[0] == '{':
         utils.run_vtctl(['ApplyVSchema', "-vschema", self.vschema])
       else:
         utils.run_vtctl(['ApplyVSchema', "-vschema_file", self.vschema])
     self.vtgate_server, self.vtgate_port = utils.vtgate_start(cache_ttl='500s', vtport=self.vtgate_port)
     vtgate_client = zkocc.ZkOccConnection("localhost:%u" % self.vtgate_port, "test_nj", 30.0)
     topology.read_topology(vtgate_client)
   except:
     self.shutdown()
     raise
Esempio n. 17
0
def teardown():
  if utils.options.skip_teardown:
    return

  teardown_procs = [
      tablet_62344.teardown_mysql(),
      tablet_62044.teardown_mysql(),
      tablet_41983.teardown_mysql(),
      tablet_31981.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  utils.zk_teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  tablet_62344.remove_tree()
  tablet_62044.remove_tree()
  tablet_41983.remove_tree()
  tablet_31981.remove_tree()

  path = os.path.join(utils.vtdataroot, 'snapshot')
  try:
    shutil.rmtree(path)
  except OSError as e:
    if utils.options.verbose:
      print >> sys.stderr, e, path
Esempio n. 18
0
def teardown():
  if utils.options.skip_teardown:
    return

  teardown_procs = [
      shard_0_master.teardown_mysql(),
      shard_0_replica.teardown_mysql(),
      shard_1_master.teardown_mysql(),
      shard_1_replica.teardown_mysql(),
      shard_2_master.teardown_mysql(),
      shard_2_replica.teardown_mysql(),
      shard_3_master.teardown_mysql(),
      shard_3_replica.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  utils.zk_teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  shard_0_master.remove_tree()
  shard_0_replica.remove_tree()
  shard_1_master.remove_tree()
  shard_1_replica.remove_tree()
  shard_2_master.remove_tree()
  shard_2_replica.remove_tree()
  shard_3_master.remove_tree()
  shard_3_replica.remove_tree()
Esempio n. 19
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  teardown_procs = [
      tablet_62344.teardown_mysql(),
      tablet_62044.teardown_mysql(),
      tablet_41983.teardown_mysql(),
      tablet_31981.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  utils.zk_teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  tablet_62344.remove_tree()
  tablet_62044.remove_tree()
  tablet_41983.remove_tree()
  tablet_31981.remove_tree()

  path = os.path.join(utils.vtdataroot, 'snapshot')
  try:
    shutil.rmtree(path)
  except OSError as e:
    logging.debug("removing snapshot %s: %s", path, str(e))
Esempio n. 20
0
  def launch(
      self, keyspace, shards=None, replica_count=1, rdonly_count=0, ddls=None):
    """Launch test environment."""

    if replica_count < 1:
      raise Exception('replica_count=%d < 1; tests now use semi-sync'
                      ' and must have at least one replica' % replica_count)
    self.tablets = []
    self.master_tablets = []
    utils.run_vtctl(['CreateKeyspace', keyspace])
    if not shards or shards[0] == '0':
      shards = ['0']

    # Create tablets and start mysqld.
    procs = []
    for shard in shards:
      procs.append(self._new_tablet(keyspace, shard, 'master', None))
      for i in xrange(replica_count):
        procs.append(self._new_tablet(keyspace, shard, 'replica', i))
      for i in xrange(rdonly_count):
        procs.append(self._new_tablet(keyspace, shard, 'rdonly', i))
    utils.wait_procs(procs)

    # init tablets.
    for shard in shards:
      tablet_index = 0
      self._init_tablet(keyspace, shard, 'master', None, tablet_index)
      tablet_index += 1
      for i in xrange(replica_count):
        self._init_tablet(keyspace, shard, 'replica', i, tablet_index)
        tablet_index += 1
      for i in xrange(rdonly_count):
        self._init_tablet(keyspace, shard, 'rdonly', i, tablet_index)
        tablet_index += 1

    # Start tablets.
    for shard in shards:
      self._start_tablet(keyspace, shard, 'master', None)
      for i in xrange(replica_count):
        self._start_tablet(keyspace, shard, 'replica', i)
      for i in xrange(rdonly_count):
        self._start_tablet(keyspace, shard, 'rdonly', i)

    for t in self.tablets:
      t.wait_for_vttablet_state('NOT_SERVING')

    for t in self.master_tablets:
      utils.run_vtctl(['InitShardMaster', '-force', keyspace+'/'+t.shard,
                       t.tablet_alias], auto_log=True)
      t.tablet_type = 'master'

    for t in self.tablets:
      t.wait_for_vttablet_state('SERVING')

    for ddl in ddls:
      fname = os.path.join(environment.tmproot, 'ddl.sql')
      with open(fname, 'w') as f:
        f.write(ddl)
      utils.run_vtctl(['ApplySchema', '-sql-file', fname, keyspace])
Esempio n. 21
0
  def setUp(self):
    environment.topo_server_setup()

    utils.wait_procs([self.tablet.init_mysql()])
    self.tablet.mquery("", ["create database vt_test_keyspace", "set global read_only = off"])

    self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace')
    self.clean_sqls = []
    self.init_sqls = []
    clean_mode = False
    with open(os.path.join(self.vttop, "test", "test_data", "test_schema.sql")) as f:
      for line in f:
        line = line.rstrip()
        if line == "# clean":
          clean_mode = True
        if line=='' or line.startswith("#"):
          continue
        if clean_mode:
          self.clean_sqls.append(line)
        else:
          self.init_sqls.append(line)
    try:
      for line in self.init_sqls:
        mcu.execute(line, {})
    finally:
      mcu.close()

    utils.run_vtctl('CreateKeyspace -force test_keyspace')
    self.tablet.init_tablet('master', 'test_keyspace', '0')

    customrules = os.path.join(environment.tmproot, 'customrules.json')
    self.create_customrules(customrules)
    schema_override = os.path.join(environment.tmproot, 'schema_override.json')
    self.create_schema_override(schema_override)
    table_acl_config = os.path.join(environment.vttop, 'test', 'test_data', 'table_acl_config.json')
    self.tablet.start_vttablet(
            memcache=self.memcache,
            customrules=customrules,
            schema_override=schema_override,
            table_acl_config=table_acl_config,
            auth=True,
    )

    # FIXME(szopa): This is necessary here only because of a bug that
    # makes the qs reload its config only after an action.
    utils.run_vtctl('Ping ' + self.tablet.tablet_alias)

    for i in range(30):
      try:
        self.conn = self.connect()
        self.txlogger = utils.curl(self.url('/debug/txlog'), background=True, stdout=open(self.txlog_file, 'w'))
        self.txlog = framework.Tailer(open(self.txlog_file), flush=self.tablet.flush)
        self.log = framework.Tailer(open(os.path.join(environment.vtlogroot, 'vttablet.INFO')), flush=self.tablet.flush)
        break
      except dbexceptions.OperationalError:
        if i == 29:
          raise
        time.sleep(1)
    self.postSetup()
Esempio n. 22
0
 def setUp(self):
   """Shuts down MySQL on the destination masters (in addition to the base setup)"""
   logging.debug("Starting base setup for MysqlDownDuringWorkerCopy")
   super(TestMysqlDownDuringWorkerCopy, self).setUp()
   logging.debug("Starting MysqlDownDuringWorkerCopy-specific setup")
   utils.wait_procs([shard_0_master.shutdown_mysql(),
     shard_1_master.shutdown_mysql()])
   logging.debug("Finished MysqlDownDuringWorkerCopy-specific setup")
Esempio n. 23
0
 def tearDown(self):
   """Restarts the MySQL processes that were killed during the setup."""
   logging.debug("Starting MysqlDownDuringWorkerCopy-specific tearDown")
   utils.wait_procs([shard_0_master.start_mysql(),
     shard_1_master.start_mysql()])
   logging.debug("Finished MysqlDownDuringWorkerCopy-specific tearDown")
   super(TestMysqlDownDuringWorkerCopy, self).tearDown()
   logging.debug("Finished base tearDown for MysqlDownDuringWorkerCopy")
Esempio n. 24
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  utils.wait_procs([t.teardown_mysql() for t in tablets], raise_on_error=False)
  utils.kill_sub_processes()
  for t in tablets:
    t.remove_tree()
Esempio n. 25
0
def setUpModule():
  try:
    environment.topo_server().setup()
    setup_procs = [t.init_mysql() for t in all_tablets]
    utils.wait_procs(setup_procs)
  except:
    tearDownModule()
    raise
Esempio n. 26
0
  def test_no_mysql_healthcheck(self):
    """This test starts a vttablet with no mysql port, while mysql is down.
    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
    # we need replication to be enabled, so the slave tablet can be healthy.
    for t in tablet_62344, tablet_62044:
      t.create_db('vt_test_keyspace')
    pos = mysql_flavor().master_position(tablet_62344)
    changeMasterCmds = mysql_flavor().change_master_commands(
                            utils.hostname,
                            tablet_62344.mysql_port,
                            pos)
    tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
                        changeMasterCmds +
                        ['START SLAVE'])

    # now shutdown all mysqld
    shutdown_procs = [
        tablet_62344.shutdown_mysql(),
        tablet_62044.shutdown_mysql(),
        ]
    utils.wait_procs(shutdown_procs)

    # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('spare', 'test_keyspace', '0',
                             include_mysql_port=False)
    for t in tablet_62344, tablet_62044:
      t.start_vttablet(wait_for_state=None,
                       target_tablet_type='replica',
                       full_mycnf_args=True, include_mysql_port=False)
    for t in tablet_62344, tablet_62044:
      t.wait_for_vttablet_state('NOT_SERVING')

    # restart mysqld
    start_procs = [
        tablet_62344.start_mysql(),
        tablet_62044.start_mysql(),
        ]
    utils.wait_procs(start_procs)

    # wait for the tablets to become healthy and fix their mysql port
    for t in tablet_62344, tablet_62044:
      t.wait_for_vttablet_state('SERVING')
    for t in tablet_62344, tablet_62044:
      # wait for mysql port to show up
      timeout = 10
      while True:
        ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
        if 'mysql' in ti['Portmap']:
          break
        timeout = utils.wait_step('mysql port in tablet record', timeout)
      self.assertEqual(ti['Portmap']['mysql'], t.mysql_port)

    # all done
    tablet.kill_tablets([tablet_62344, tablet_62044])
Esempio n. 27
0
 def shutdown(self):
   tablet.kill_tablets(self.tablets)
   teardown_procs = [t.teardown_mysql() for t in self.tablets]
   utils.wait_procs(teardown_procs, raise_on_error=False)
   environment.topo_server().teardown()
   utils.kill_sub_processes()
   utils.remove_tmp_files()
   for t in self.tablets:
     t.remove_tree()
Esempio n. 28
0
 def _reset_tablet_dir(self, t):
   """Stop mysql, delete everything including tablet dir, restart mysql."""
   utils.wait_procs([t.teardown_mysql()])
   t.remove_tree()
   proc = t.init_mysql()
   if use_mysqlctld:
     t.wait_for_mysqlctl_socket()
   else:
     utils.wait_procs([proc])
Esempio n. 29
0
def setUpModule():
  global vtgate_server
  global vtgate_port
  global vtgate_socket_file
  global master_start_position

  try:
    environment.topo_server_setup()

    # start mysql instance external to the test
    setup_procs = [master_tablet.init_mysql(),
                   replica_tablet.init_mysql()
                   ]
    utils.wait_procs(setup_procs)

    # Start up a master mysql and vttablet
    logging.debug("Setting up tablets")
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
    master_tablet.init_tablet('master', 'test_keyspace', '0')
    replica_tablet.init_tablet('replica', 'test_keyspace', '0')
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
    utils.validate_topology()
    master_tablet.create_db('vt_test_keyspace')
    master_tablet.create_db('other_database')
    replica_tablet.create_db('vt_test_keyspace')
    replica_tablet.create_db('other_database')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])

    vtgate_socket_file = environment.tmproot + '/vtgate.sock'
    vtgate_server, vtgate_port = utils.vtgate_start(socket_file=vtgate_socket_file)

    master_tablet.start_vttablet()
    replica_tablet.start_vttablet()
    utils.run_vtctl(['SetReadWrite', master_tablet.tablet_alias])
    utils.check_db_read_write(master_tablet.tablet_uid)

    for t in [master_tablet, replica_tablet]:
      t.reset_replication()
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0',
                     master_tablet.tablet_alias], auto_log=True)

    # reset counter so tests don't assert
    tablet.Tablet.tablets_running = 0

    master_start_position = _get_master_current_position()
    master_tablet.mquery('vt_test_keyspace', _create_vt_insert_test)
    master_tablet.mquery('vt_test_keyspace', _create_vt_a)
    master_tablet.mquery('vt_test_keyspace', _create_vt_b)

    utils.run_vtctl(['ReloadSchema', master_tablet.tablet_alias])
    utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias])

  except:
    tearDownModule()
    raise
Esempio n. 30
0
  def setUp(self):
    utils.wait_procs([self.tablet.init_mysql()])
    self.tablet.mquery("", ["create database vt_test_keyspace", "set global read_only = off"])

    self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace')
    with open(os.path.join(self.vttop, "test", "test_data", "test_schema.sql")) as f:
      self.clean_sqls = []
      self.init_sqls = []
      clean_mode = False
      for line in f:
        line = line.rstrip()
        if line == "# clean":
          clean_mode = True
        if line=='' or line.startswith("#"):
          continue
        if clean_mode:
          self.clean_sqls.append(line)
        else:
          self.init_sqls.append(line)
      try:
        for line in self.init_sqls:
          mcu.execute(line, {})
      finally:
        mcu.close()

    customrules = os.path.join(environment.tmproot, 'customrules.json')
    self.create_customrules(customrules)
    schema_override = os.path.join(environment.tmproot, 'schema_override.json')
    self.create_schema_override(schema_override)
    table_acl_config = os.path.join(environment.vttop, 'test', 'test_data', 'table_acl_config.json')

    if self.env == 'vttablet':
      environment.topo_server().setup()
      utils.run_vtctl('CreateKeyspace -force test_keyspace')
      self.tablet.init_tablet('master', 'test_keyspace', '0')
      self.tablet.start_vttablet(
              memcache=self.memcache,
              customrules=customrules,
              schema_override=schema_override,
              table_acl_config=table_acl_config,
              auth=True,
      )
    else:
      self.tablet.start_vtocc(
              memcache=self.memcache,
              customrules=customrules,
              schema_override=schema_override,
              table_acl_config=table_acl_config,
              auth=True,
              keyspace="test_keyspace", shard="0",
      )
    self.conn = self.connect()
    self.txlogger = utils.curl(self.url('/debug/txlog'), background=True, stdout=open(self.txlog_file, 'w'))
    self.txlog = framework.Tailer(self.txlog_file, flush=self.tablet.flush)
    self.log = framework.Tailer(os.path.join(environment.vtlogroot, '%s.INFO' % self.env), flush=self.tablet.flush)
    self.querylog = Querylog(self)
Esempio n. 31
0
def tearDownModule():
    global vtgate_server
    global __tablets
    logging.debug("in tearDownModule")
    if utils.options.skip_teardown:
        return
    logging.debug("Tearing down the servers and setup")
    utils.vtgate_kill(vtgate_server)
    if __tablets is not None:
        tablet.kill_tablets(__tablets)
        teardown_procs = []
        for t in __tablets:
            teardown_procs.append(t.teardown_mysql())
        utils.wait_procs(teardown_procs, raise_on_error=False)

    environment.topo_server().teardown()

    utils.kill_sub_processes()
    utils.remove_tmp_files()

    if __tablets is not None:
        for t in __tablets:
            t.remove_tree()
Esempio n. 32
0
def setUpModule():
  try:
    environment.topo_server().setup()

    # setup all processes
    setup_procs = [
        shard_0_master.init_mysql(),
        shard_0_slave.init_mysql(),
        ]
    utils.wait_procs(setup_procs)

    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    shard_0_master.init_tablet('replica', 'test_keyspace', '0')
    shard_0_slave.init_tablet('replica', 'test_keyspace', '0')

    # create databases so vttablet can start behaving normally
    shard_0_master.create_db('vt_test_keyspace')
    shard_0_slave.create_db('vt_test_keyspace')

  except:
    tearDownModule()
    raise
Esempio n. 33
0
def setUpModule():
  try:
    environment.topo_server().setup()
    utils.Vtctld().start()

    setup_procs = [
        master_tablet.init_mysql(),
        replica_tablet.init_mysql(),
        ]
    utils.wait_procs(setup_procs)

    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

    master_tablet.init_tablet('master', 'test_keyspace', '0')
    replica_tablet.init_tablet('replica', 'test_keyspace', '0')
    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    master_tablet.create_db('vt_test_keyspace')
    replica_tablet.create_db('vt_test_keyspace')
  except:
    tearDownModule()
    raise
Esempio n. 34
0
def setUpModule():
    try:
        environment.topo_server().setup()

        # start mysql instance external to the test
        setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()]
        utils.wait_procs(setup_procs)

        # Start up a master mysql and vttablet
        logging.debug("Setting up tablets")
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
        master_tablet.init_tablet('master', 'test_keyspace', '0')
        replica_tablet.init_tablet('replica', 'test_keyspace', '0')
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
        utils.validate_topology()

        master_tablet.populate('vt_test_keyspace', create_vt_insert_test)
        replica_tablet.populate('vt_test_keyspace', create_vt_insert_test)

        utils.VtGate().start()

        master_tablet.start_vttablet(memcache=True, wait_for_state=None)
        replica_tablet.start_vttablet(memcache=True, wait_for_state=None)
        master_tablet.wait_for_vttablet_state('SERVING')
        replica_tablet.wait_for_vttablet_state('SERVING')

        utils.run_vtctl(
            ['InitShardMaster', 'test_keyspace/0', master_tablet.tablet_alias],
            auto_log=True)
        utils.validate_topology()

        # restart the replica tablet so the stats are reset
        replica_tablet.kill_vttablet()
        replica_tablet.start_vttablet(memcache=True)
    except:
        tearDownModule()
        raise
Esempio n. 35
0
def setUpModule():
    try:
        environment.topo_server_setup()

        setup_procs = [
            shard_0_master.init_mysql(),
            shard_0_replica.init_mysql(),
            shard_0_ny_slave.init_mysql(),
            shard_1_master.init_mysql(),
            shard_1_slave1.init_mysql(),
            shard_1_slave2.init_mysql(),
            shard_1_ny_slave.init_mysql(),
            shard_1_rdonly.init_mysql(),
            shard_2_master.init_mysql(),
            shard_2_replica1.init_mysql(),
            shard_2_replica2.init_mysql(),
            shard_3_master.init_mysql(),
            shard_3_replica.init_mysql(),
            shard_3_rdonly.init_mysql(),
        ]
        utils.wait_procs(setup_procs)
    except:
        tearDownModule()
        raise
Esempio n. 36
0
def tearDownModule():
    if utils.options.skip_teardown:
        return

    if use_mysqlctld:
        # Try to terminate mysqlctld gracefully, so it kills its mysqld.
        for proc in setup_procs:
            utils.kill_sub_process(proc, soft=True)
        teardown_procs = setup_procs
    else:
        teardown_procs = [
            tablet_master.teardown_mysql(),
            tablet_replica1.teardown_mysql(),
            tablet_replica2.teardown_mysql(),
        ]
    utils.wait_procs(teardown_procs, raise_on_error=False)

    environment.topo_server().teardown()
    utils.kill_sub_processes()
    utils.remove_tmp_files()

    tablet_master.remove_tree()
    tablet_replica1.remove_tree()
    tablet_replica2.remove_tree()
Esempio n. 37
0
def setUpModule():
  global vtgate_server, vtgate_port
  logging.debug("in setUpModule")
  try:
    environment.topo_server().setup()
    setup_topology()

    # start mysql instance external to the test
    global __tablets
    setup_procs = []
    for tablet in __tablets:
      setup_procs.append(tablet.init_mysql())
    utils.wait_procs(setup_procs)
    create_db()
    start_tablets()
    vtgate_server, vtgate_port = utils.vtgate_start()
    # FIXME(shrutip): this should be removed once vtgate_cursor's
    # dependency on topology goes away.
    vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                          "test_nj", 30.0)
    topology.read_topology(vtgate_client)
  except:
    tearDownModule()
    raise
Esempio n. 38
0
def tearDownModule():
    if utils.options.skip_teardown:
        return

    teardown_procs = [
        source_master.teardown_mysql(),
        source_replica.teardown_mysql(),
        source_rdonly.teardown_mysql(),
        destination_master.teardown_mysql(),
        destination_replica.teardown_mysql(),
        destination_rdonly.teardown_mysql(),
    ]
    utils.wait_procs(teardown_procs, raise_on_error=False)

    environment.topo_server_teardown()
    utils.kill_sub_processes()
    utils.remove_tmp_files()

    source_master.remove_tree()
    source_replica.remove_tree()
    source_rdonly.remove_tree()
    destination_master.remove_tree()
    destination_replica.remove_tree()
    destination_rdonly.remove_tree()
Esempio n. 39
0
def tearDownModule():
    logging.debug('in tearDownModule')
    if utils.options.skip_teardown:
        return
    logging.debug('Tearing down the servers and setup')
    tablet.kill_tablets(
        [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica])
    teardown_procs = [
        shard_0_master.teardown_mysql(),
        shard_0_replica.teardown_mysql(),
        shard_1_master.teardown_mysql(),
        shard_1_replica.teardown_mysql(),
    ]
    utils.wait_procs(teardown_procs, raise_on_error=False)

    environment.topo_server().teardown()

    utils.kill_sub_processes()
    utils.remove_tmp_files()

    shard_0_master.remove_tree()
    shard_0_replica.remove_tree()
    shard_1_master.remove_tree()
    shard_1_replica.remove_tree()
Esempio n. 40
0
def tearDownModule():
    if utils.options.skip_teardown:
        return

    teardown_procs = [
        shard_0_master.teardown_mysql(),
        shard_0_replica.teardown_mysql(),
        shard_1_master.teardown_mysql(),
        shard_1_slave1.teardown_mysql(),
        shard_1_slave2.teardown_mysql(),
        shard_1_rdonly.teardown_mysql(),
        shard_2_master.teardown_mysql(),
        shard_2_replica1.teardown_mysql(),
        shard_2_replica2.teardown_mysql(),
        shard_3_master.teardown_mysql(),
        shard_3_replica.teardown_mysql(),
        shard_3_rdonly.teardown_mysql(),
    ]
    utils.wait_procs(teardown_procs, raise_on_error=False)

    environment.topo_server_teardown()
    utils.kill_sub_processes()
    utils.remove_tmp_files()

    shard_0_master.remove_tree()
    shard_0_replica.remove_tree()
    shard_1_master.remove_tree()
    shard_1_slave1.remove_tree()
    shard_1_slave2.remove_tree()
    shard_1_rdonly.remove_tree()
    shard_2_master.remove_tree()
    shard_2_replica1.remove_tree()
    shard_2_replica2.remove_tree()
    shard_3_master.remove_tree()
    shard_3_replica.remove_tree()
    shard_3_rdonly.remove_tree()
Esempio n. 41
0
def tearDownModule():
  if utils.options.skip_teardown:
    return

  global vtgate_server
  utils.vtgate_kill(vtgate_server)
  tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly,
                      shard_1_master, shard_1_replica, shard_1_rdonly])
  teardown_procs = [
      shard_0_master.teardown_mysql(),
      shard_0_replica.teardown_mysql(),
      shard_0_rdonly.teardown_mysql(),
      shard_1_master.teardown_mysql(),
      shard_1_replica.teardown_mysql(),
      shard_1_rdonly.teardown_mysql(),
      unsharded_master.teardown_mysql(),
      unsharded_replica.teardown_mysql(),
      unsharded_rdonly.teardown_mysql(),
      ]
  utils.wait_procs(teardown_procs, raise_on_error=False)

  environment.topo_server().teardown()
  utils.kill_sub_processes()
  utils.remove_tmp_files()

  shard_0_master.remove_tree()
  shard_0_replica.remove_tree()
  shard_0_rdonly.remove_tree()
  shard_1_master.remove_tree()
  shard_1_replica.remove_tree()
  shard_1_rdonly.remove_tree()
  shard_1_rdonly.remove_tree()
  shard_1_rdonly.remove_tree()
  unsharded_master.remove_tree()
  unsharded_replica.remove_tree()
  unsharded_rdonly.remove_tree()
Esempio n. 42
0
def setUpModule():
  try:
    environment.topo_server().setup()

    # start mysql instance external to the test
    utils.wait_procs([t.init_mysql() for t in all_tablets])

    # start a vtctld so the vtctl insert commands are just RPCs, not forks
    utils.Vtctld().start()

    # Start up a master mysql and vttablet
    logging.debug('Setting up tablets')
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
    master_tablet.init_tablet('master', 'test_keyspace', '0')
    replica_tablet.init_tablet('replica', 'test_keyspace', '0')
    replica2_tablet.init_tablet('replica', 'test_keyspace', '0')
    utils.validate_topology()

    for t in all_tablets:
      t.populate('vt_test_keyspace', create_vt_insert_test)

    for t in all_tablets:
      t.start_vttablet(memcache=True, wait_for_state=None)
    for t in all_tablets:
      t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/0',
                     master_tablet.tablet_alias], auto_log=True)
    utils.validate_topology()

    # restart the replica tablet so the stats are reset
    replica_tablet.kill_vttablet()
    replica_tablet.start_vttablet(memcache=True)
  except:
    tearDownModule()
    raise
Esempio n. 43
0
def _init_mysql(tablets):
    setup_procs = []
    for t in tablets:
        setup_procs.append(t.init_mysql())
    utils.wait_procs(setup_procs)
Esempio n. 44
0
def setUpModule():
    global master_start_position

    try:
        environment.topo_server().setup()

        # start mysql instance external to the test
        setup_procs = [master_tablet.init_mysql(), replica_tablet.init_mysql()]
        utils.wait_procs(setup_procs)

        # start a vtctld so the vtctl insert commands are just RPCs, not forks
        utils.Vtctld().start()

        # Start up a master mysql and vttablet
        logging.debug('Setting up tablets')
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
        master_tablet.init_tablet('master',
                                  'test_keyspace',
                                  '0',
                                  tablet_index=0)
        replica_tablet.init_tablet('replica',
                                   'test_keyspace',
                                   '0',
                                   tablet_index=1)
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.validate_topology()
        master_tablet.create_db('vt_test_keyspace')
        master_tablet.create_db('other_database')
        replica_tablet.create_db('vt_test_keyspace')
        replica_tablet.create_db('other_database')

        master_tablet.start_vttablet(wait_for_state=None)
        replica_tablet.start_vttablet(wait_for_state=None)
        master_tablet.wait_for_vttablet_state('SERVING')
        replica_tablet.wait_for_vttablet_state('NOT_SERVING')

        for t in [master_tablet, replica_tablet]:
            t.reset_replication()
        utils.run_vtctl(
            ['InitShardMaster', 'test_keyspace/0', master_tablet.tablet_alias],
            auto_log=True)

        utils.wait_for_tablet_type(replica_tablet.tablet_alias, 'replica')
        master_tablet.wait_for_vttablet_state('SERVING')
        replica_tablet.wait_for_vttablet_state('SERVING')

        # reset counter so tests don't assert
        tablet.Tablet.tablets_running = 0

        master_start_position = _get_master_current_position()
        master_tablet.mquery('vt_test_keyspace', _create_vt_insert_test)
        master_tablet.mquery('vt_test_keyspace', _create_vt_a)
        master_tablet.mquery('vt_test_keyspace', _create_vt_b)

        utils.run_vtctl(['ReloadSchema', master_tablet.tablet_alias])
        utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias])
        utils.run_vtctl(['RebuildVSchemaGraph'])

        utils.VtGate().start(tablets=[master_tablet, replica_tablet])
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)

        # Wait for the master and slave tablet's ReloadSchema to have worked.
        # Note we don't specify a keyspace name, there is only one, vschema
        # will just use that single keyspace.
        timeout = 10
        while True:
            try:
                utils.vtgate.execute('select count(1) from vt_insert_test',
                                     tablet_type='master')
                utils.vtgate.execute('select count(1) from vt_insert_test',
                                     tablet_type='replica')
                break
            except protocols_flavor().client_error_exception_type():
                logging.exception('query failed')
                timeout = utils.wait_step('slave tablet having correct schema',
                                          timeout)
                # also re-run ReloadSchema on slave, it case the first one
                # didn't get the replicated table.
                utils.run_vtctl(['ReloadSchema', replica_tablet.tablet_alias])

    except:
        tearDownModule()
        raise
Esempio n. 45
0
    with self.assertRaises(dbexceptions.TimeoutError):
      replica_conn._execute("select sleep(12) from dual", {})

    try:
      master_conn = get_connection(db_type='master')
    except Exception, e:
      self.fail("Connection to shard0 master failed with error %s" % str(e))
    with self.assertRaises(dbexceptions.TimeoutError):
      master_conn._execute("select sleep(12) from dual", {})

  def test_restart_mysql_failure(self):
    try:
      replica_conn = get_connection(db_type='replica', shard_index=self.shard_index)
    except Exception, e:
      self.fail("Connection to shard0 replica failed with error %s" % str(e))
    utils.wait_procs([self.replica_tablet.shutdown_mysql(),])
    with self.assertRaises(dbexceptions.DatabaseError):
      replica_conn._execute("select 1 from vt_insert_test", {})
    utils.wait_procs([self.replica_tablet.start_mysql(),])
    self.replica_tablet.kill_vttablet()
    self.replica_tablet.start_vttablet()
    replica_conn._execute("select 1 from vt_insert_test", {})

  def test_retry_txn_pool_full(self):
    master_conn = get_connection(db_type='master')
    master_conn._execute("set vt_transaction_cap=1", {})
    master_conn.begin()
    with self.assertRaises(dbexceptions.OperationalError):
      master_conn2 = get_connection(db_type='master')
      master_conn2.begin()
    master_conn.commit()
Esempio n. 46
0
  def test_query_timeout(self):
    try:
      replica_conn = get_replica_connection()
    except Exception, e:
      self.fail("Connection to shard0 replica failed with error %s" % str(e))
    with self.assertRaises(tablet3.TimeoutError):
      replica_conn._execute("select sleep(12) from dual", {})

    try:
      master_conn = get_master_connection()
    except Exception, e:
      self.fail("Connection to shard0 master failed with error %s" % str(e))
    with self.assertRaises(tablet3.TimeoutError):
      master_conn._execute("select sleep(12) from dual", {})

  def test_mysql_failure(self):
    try:
      replica_conn = get_replica_connection()
    except Exception, e:
      self.fail("Connection to shard0 replica failed with error %s" % str(e))
    utils.wait_procs([shard_0_replica.shutdown_mysql(),])
    with self.assertRaises(tablet3.FatalError):
      replica_conn._execute("select 1 from vt_insert_test", {})
    utils.wait_procs([shard_0_replica.start_mysql(),])
    shard_0_replica.kill_vttablet()
    shard_0_replica.start_vttablet()


if __name__ == '__main__':
  utils.main()
Esempio n. 47
0
    def test_resharding(self):
        # create the keyspace with just one shard
        shard_master.init_tablet('replica',
                                 keyspace='test_keyspace',
                                 shard='0',
                                 tablet_index=0)
        shard_replica.init_tablet('replica',
                                  keyspace='test_keyspace',
                                  shard='0',
                                  tablet_index=1)
        shard_rdonly1.init_tablet('rdonly',
                                  keyspace='test_keyspace',
                                  shard='0',
                                  tablet_index=2)

        for t in [shard_master, shard_replica, shard_rdonly1]:
            t.create_db('vt_test_keyspace')

        # replica is not started, InitShardMaster should timeout
        shard_master.start_vttablet(wait_for_state=None,
                                    binlog_use_v3_resharding_mode=False)
        shard_rdonly1.start_vttablet(wait_for_state=None,
                                     binlog_use_v3_resharding_mode=False)

        for t in [shard_master, shard_rdonly1]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work - expect fail
        # because replica tablet is not up
        _, stderr = utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            shard_master.tablet_alias
        ],
                                    auto_log=True,
                                    expect_fail=True)

        self.assertIn('tablet test_nj-0000062345 ResetReplication failed',
                      stderr)
        # start replica
        shard_replica.start_vttablet(wait_for_state=None,
                                     binlog_use_v3_resharding_mode=False)

        shard_replica.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            shard_master.tablet_alias
        ],
                        auto_log=True)

        utils.wait_for_tablet_type(shard_replica.tablet_alias, 'replica')
        utils.wait_for_tablet_type(shard_rdonly1.tablet_alias, 'rdonly')
        for t in [shard_master, shard_replica, shard_rdonly1]:
            t.wait_for_vttablet_state('SERVING')

        # create the tables and add startup values
        self._create_schema()
        self._insert_startup_values()

        # reload schema on all tablets so we can query them
        for t in [shard_master, shard_replica, shard_rdonly1]:
            utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

        # We must start vtgate after tablets are up, or else wait until 1min refresh
        # (that is the tablet_refresh_interval parameter for discovery gateway)
        # we want cache_ttl at zero so we re-read the topology for every test query.

        utils.VtGate().start(
            cache_ttl='0',
            tablets=[shard_master, shard_replica, shard_rdonly1])
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteShards,
        # as we're not sharded yet.
        # we have 3 values in the database, asking for 4 splits will get us
        # a single query.
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 1)
        self.assertEqual(s[0]['shard_part']['shards'][0], '0')

        # change the schema, backfill keyspace_id, and change schema again
        self._add_sharding_key_to_schema()
        self._backfill_keyspace_id(shard_master)
        self._mark_sharding_key_not_null()

        # now we can be a sharded keyspace (and propagate to SrvKeyspace)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            base_sharding.keyspace_id_type
        ])
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # run a health check on source replica so it responds to discovery
        utils.run_vtctl(['RunHealthCheck', shard_replica.tablet_alias])

        # create the split shards
        shard_0_master.init_tablet('replica',
                                   keyspace='test_keyspace',
                                   shard='-80',
                                   tablet_index=0)
        shard_0_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='-80',
                                    tablet_index=1)
        shard_0_rdonly1.init_tablet('rdonly',
                                    keyspace='test_keyspace',
                                    shard='-80',
                                    tablet_index=2)
        shard_1_master.init_tablet('replica',
                                   keyspace='test_keyspace',
                                   shard='80-',
                                   tablet_index=0)
        shard_1_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='80-',
                                    tablet_index=1)
        shard_1_rdonly1.init_tablet('rdonly',
                                    keyspace='test_keyspace',
                                    shard='80-',
                                    tablet_index=2)

        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly1,
                shard_1_master, shard_1_replica, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             binlog_use_v3_resharding_mode=False)

        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly1,
                shard_1_master, shard_1_replica, shard_1_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        for t in [shard_0_replica, shard_1_replica]:
            utils.wait_for_tablet_type(t.tablet_alias, 'replica')
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            utils.wait_for_tablet_type(t.tablet_alias, 'rdonly')

        sharded_tablets = [
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master,
            shard_1_replica, shard_1_rdonly1
        ]
        for t in sharded_tablets:
            t.wait_for_vttablet_state('SERVING')

        # must restart vtgate after tablets are up, or else wait until 1min refresh
        # we want cache_ttl at zero so we re-read the topology for every test query.
        utils.vtgate.kill()

        utils.vtgate = None
        utils.VtGate().start(cache_ttl='0',
                             tablets=[
                                 shard_master, shard_replica, shard_rdonly1,
                                 shard_0_master, shard_0_replica,
                                 shard_0_rdonly1, shard_1_master,
                                 shard_1_replica, shard_1_rdonly1
                             ])
        var = None

        # Wait for the endpoints, either local or remote.
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.replica',
                                        1,
                                        var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica',
                                        1,
                                        var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1, var=var)

        # check the Map Reduce API works correctly, should use ExecuteKeyRanges now,
        # as we are sharded (with just one shard).
        # again, we have 3 values in the database, asking for 4 splits will get us
        # a single query.
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 1)
        self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
        # There must be one empty KeyRange which represents the full keyspace.
        self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
        self.assertEqual(s[0]['key_range_part']['key_ranges'][0], {})

        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -\n'
            'Partitions(replica): -\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)
        utils.run_vtctl(['RunHealthCheck', shard_rdonly1.tablet_alias])

        # Run vtworker as daemon for the following SplitClone commands.
        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            [
                '--cell', 'test_nj', '--command_display_interval', '10ms',
                '--use_v3_resharding_mode=false'
            ],
            auto_log=True)

        # Initial clone (online).
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 3, 0, 0, 0)

        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Modify the destination shard. SplitClone will revert the changes.
        # Delete row 1 (provokes an insert).
        shard_0_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=1',
                              write=True)
        # Delete row 2 (provokes an insert).
        shard_1_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=2',
                              write=True)
        # Update row 3 (provokes an update).
        shard_1_master.mquery(
            'vt_test_keyspace',
            "update resharding1 set msg='msg-not-3' where id=3",
            write=True)
        # Insert row 4 (provokes a delete).
        self._insert_value(shard_1_master, 'resharding1', 4, 'msg4',
                           0xD000000000000000)

        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count',
            '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets',
            '1', 'test_keyspace/0'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 2, 1, 1, 0)
        self.verify_reconciliation_counters(worker_port, 'Offline',
                                            'resharding1', 0, 0, 0, 3)
        # Terminate worker daemon because it is no longer needed.
        utils.kill_sub_process(worker_proc, soft=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check the binlog players are running
        logging.debug('Waiting for binlog players to start on new masters...')
        self.check_destination_master(shard_0_master, ['test_keyspace/0'])
        self.check_destination_master(shard_1_master, ['test_keyspace/0'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_replica, horizontal=True)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_0_master, ['test_keyspace/0'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_1_master, ['test_keyspace/0'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        if base_sharding.use_multi_split_diff:
            logging.debug('Running vtworker MultiSplitDiff for 0')
            utils.run_vtworker([
                '-cell', 'test_nj', '--use_v3_resharding_mode=false',
                'MultiSplitDiff', '--min_healthy_rdonly_tablets', '1',
                'test_keyspace/0'
            ],
                               auto_log=True)
        else:
            logging.debug('Running vtworker SplitDiff for -80')
            utils.run_vtworker([
                '-cell', 'test_nj', '--use_v3_resharding_mode=false',
                'SplitDiff', '--min_healthy_rdonly_tablets', '1',
                'test_keyspace/-80'
            ],
                               auto_log=True)
            logging.debug('Running vtworker SplitDiff for 80-')
            utils.run_vtworker([
                '-cell', 'test_nj', '--use_v3_resharding_mode=false',
                'SplitDiff', '--min_healthy_rdonly_tablets', '1',
                'test_keyspace/80-'
            ],
                               auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for the destination master tablet, make sure we have it all
        self.check_running_binlog_player(shard_0_master, 2000, 2000)
        self.check_running_binlog_player(shard_1_master, 6000, 2000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        expect_fail=True)

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # make sure rdonly tablets are back to serving before hitting vtgate.
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            t.wait_for_vttablet_state('SERVING')

        utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteKeyRanges
        # on both destination shards now.
        # we ask for 2 splits to only have one per shard
        sql = 'select id, msg from resharding1'
        timeout = 10.0
        while True:
            try:
                s = utils.vtgate.split_query(sql, 'test_keyspace', 2)
                break
            except Exception:  # pylint: disable=broad-except
                timeout = utils.wait_step(
                    'vtgate executes split_query properly', timeout)
        self.assertEqual(len(s), 2)
        self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
        self.assertEqual(s[1]['key_range_part']['keyspace'], 'test_keyspace')
        self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
        self.assertEqual(len(s[1]['key_range_part']['key_ranges']), 1)

        # then serve replica from the split shards
        source_tablet = shard_replica
        destination_tablets = [shard_0_replica, shard_1_replica]

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, source_tablet, True, False)
        utils.check_tablet_query_services(self, destination_tablets, False,
                                          True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, source_tablet, False, True)
        utils.check_tablet_query_services(self, destination_tablets, True,
                                          False)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_0_master)
        self.check_no_binlog_player(shard_1_master)

        # make sure we can't delete a shard with tablets
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

        # remove the original tablets in the original shard
        tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
        for t in [shard_replica, shard_rdonly1]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards should be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master,
            shard_1_replica, shard_1_rdonly1
        ])
Esempio n. 48
0
  def verify_successful_worker_copy_with_reparent(self, mysql_down=False):
    """Verifies that vtworker can successfully copy data for a SplitClone.

    Order of operations:
    1. Run a background vtworker
    2. Wait until the worker successfully resolves the destination masters.
    3. Reparent the destination tablets
    4. Wait until the vtworker copy is finished
    5. Verify that the worker was forced to reresolve topology and retry writes
      due to the reparent.
    6. Verify that the data was copied successfully to both new shards

    Args:
      mysql_down: boolean. If True, we take down the MySQL instances on the
        destination masters at first, then bring them back and reparent away.

    Raises:
      AssertionError if things didn't go as expected.
    """
    if mysql_down:
      logging.debug('Shutting down mysqld on destination masters.')
      utils.wait_procs(
          [shard_0_master.shutdown_mysql(),
           shard_1_master.shutdown_mysql()])

    worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
        ['--cell', 'test_nj'],
        auto_log=True)

    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--source_reader_count', '1',
         '--destination_pack_count', '1',
         '--destination_writer_count', '1',
         'test_keyspace/0'],
        worker_rpc_port)

    if mysql_down:
      # If MySQL is down, we wait until resolving at least twice (to verify that
      # we do reresolve and retry due to MySQL being down).
      worker_vars = utils.poll_for_vars(
          'vtworker', worker_port,
          'WorkerDestinationActualResolves >= 2',
          condition_fn=lambda v: v.get('WorkerDestinationActualResolves') >= 2)
      self.assertNotEqual(
          worker_vars['WorkerRetryCount'], {},
          "expected vtworker to retry, but it didn't")
      logging.debug('Worker has resolved at least twice, starting reparent now')

      # Bring back masters. Since we test with semi-sync now, we need at least
      # one replica for the new master. This test is already quite expensive,
      # so we bring back the old master as a replica rather than having a third
      # replica up the whole time.
      logging.debug('Restarting mysqld on destination masters')
      utils.wait_procs(
          [shard_0_master.start_mysql(),
           shard_1_master.start_mysql()])

      # Reparent away from the old masters.
      utils.run_vtctl(
          ['PlannedReparentShard', 'test_keyspace/-80',
           shard_0_replica.tablet_alias], auto_log=True)
      utils.run_vtctl(
          ['PlannedReparentShard', 'test_keyspace/80-',
           shard_1_replica.tablet_alias], auto_log=True)

    else:
      # NOTE: There is a race condition around this:
      #   It's possible that the SplitClone vtworker command finishes before the
      #   PlannedReparentShard vtctl command, which we start below, succeeds.
      #   Then the test would fail because vtworker did not have to resolve the
      #   master tablet again (due to the missing reparent).
      #
      # To workaround this, the test takes a parameter to increase the number of
      # rows that the worker has to copy (with the idea being to slow the worker
      # down).
      # You should choose a value for num_insert_rows, such that this test
      # passes for your environment (trial-and-error...)
      utils.poll_for_vars(
          'vtworker', worker_port,
          'WorkerDestinationActualResolves >= 1',
          condition_fn=lambda v: v.get('WorkerDestinationActualResolves') >= 1)
      logging.debug('Worker has resolved at least once, starting reparent now')

      utils.run_vtctl(
          ['PlannedReparentShard', 'test_keyspace/-80',
           shard_0_replica.tablet_alias], auto_log=True)
      utils.run_vtctl(
          ['PlannedReparentShard', 'test_keyspace/80-',
           shard_1_replica.tablet_alias], auto_log=True)

    utils.wait_procs([workerclient_proc])

    # Verify that we were forced to reresolve and retry.
    worker_vars = utils.get_vars(worker_port)
    self.assertGreater(worker_vars['WorkerDestinationActualResolves'], 1)
    self.assertGreater(worker_vars['WorkerDestinationAttemptedResolves'], 1)
    self.assertNotEqual(worker_vars['WorkerRetryCount'], {},
                        "expected vtworker to retry, but it didn't")
    utils.kill_sub_process(worker_proc, soft=True)

    # Make sure that everything is caught up to the same replication point
    self.run_split_diff('test_keyspace/-80', all_shard_tablets, shard_0_tablets)
    self.run_split_diff('test_keyspace/80-', all_shard_tablets, shard_1_tablets)

    self.assert_shard_data_equal(0, shard_master, shard_0_tablets.replica)
    self.assert_shard_data_equal(1, shard_master, shard_1_tablets.replica)
Esempio n. 49
0
  def test_no_mysql_healthcheck(self):
    """This test starts a vttablet with no mysql port, while mysql is down.

    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
    # we need replication to be enabled, so the slave tablet can be healthy.
    for t in tablet_62344, tablet_62044:
      t.create_db('vt_test_keyspace')
    pos = mysql_flavor().master_position(tablet_62344)
    # Use 'localhost' as hostname because Travis CI worker hostnames
    # are too long for MySQL replication.
    change_master_cmds = mysql_flavor().change_master_commands(
        'localhost',
        tablet_62344.mysql_port,
        pos)
    tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
                        change_master_cmds + ['START SLAVE'])

    # now shutdown all mysqld
    shutdown_procs = [
        tablet_62344.shutdown_mysql(),
        tablet_62044.shutdown_mysql(),
        ]
    utils.wait_procs(shutdown_procs)

    # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('replica', 'test_keyspace', '0',
                             include_mysql_port=False)
    for t in tablet_62344, tablet_62044:
      # Since MySQL is down at this point and we want the tablet to start up
      # successfully, we have to use supports_backups=False.
      t.start_vttablet(wait_for_state=None, supports_backups=False,
                       full_mycnf_args=True, include_mysql_port=False)
    for t in tablet_62344, tablet_62044:
      t.wait_for_vttablet_state('NOT_SERVING')
      self.check_healthz(t, False)

    # Tell slave to not try to repair replication in healthcheck.
    # The StopSlave will ultimately fail because mysqld is not running,
    # But vttablet should remember that it's not supposed to fix replication.
    utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias], expect_fail=True)

    # The above notice to not fix replication should survive tablet restart.
    tablet_62044.kill_vttablet()
    tablet_62044.start_vttablet(wait_for_state='NOT_SERVING',
                                full_mycnf_args=True, include_mysql_port=False,
                                supports_backups=False)

    # restart mysqld
    start_procs = [
        tablet_62344.start_mysql(),
        tablet_62044.start_mysql(),
        ]
    utils.wait_procs(start_procs)

    # the master should still be healthy
    utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias],
                    auto_log=True)
    self.check_healthz(tablet_62344, True)

    # the slave will now be healthy, but report a very high replication
    # lag, because it can't figure out what it exactly is.
    utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias],
                    auto_log=True)
    tablet_62044.wait_for_vttablet_state('SERVING')
    self.check_healthz(tablet_62044, True)

    health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                   '-count', '1',
                                   tablet_62044.tablet_alias])
    self.assertIn('seconds_behind_master', health['realtime_stats'])
    self.assertEqual(health['realtime_stats']['seconds_behind_master'], 7200)
    self.assertIn('serving', health)

    # restart replication, wait until health check goes small
    # (a value of zero is default and won't be in structure)
    utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
    timeout = 10
    while True:
      utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias],
                      auto_log=True)
      health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                     '-count', '1',
                                     tablet_62044.tablet_alias])
      if 'serving' in health and (
          ('seconds_behind_master' not in health['realtime_stats']) or
          (health['realtime_stats']['seconds_behind_master'] < 30)):
        break
      timeout = utils.wait_step('health delay goes back down', timeout)

    # wait for the tablet to fix its mysql port
    for t in tablet_62344, tablet_62044:
      # wait for mysql port to show up
      timeout = 10
      while True:
        ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
        if 'mysql' in ti['port_map']:
          break
        timeout = utils.wait_step('mysql port in tablet record', timeout)
      self.assertEqual(ti['port_map']['mysql'], t.mysql_port)

    # all done
    tablet.kill_tablets([tablet_62344, tablet_62044])
Esempio n. 50
0
  def test_no_mysql_healthcheck(self):
    """This test starts a vttablet with no mysql port, while mysql is down.
    It makes sure vttablet will start properly and be unhealthy.
    Then we start mysql, and make sure vttablet becomes healthy.
    """
    # we need replication to be enabled, so the slave tablet can be healthy.
    for t in tablet_62344, tablet_62044:
      t.create_db('vt_test_keyspace')
    pos = mysql_flavor().master_position(tablet_62344)
    changeMasterCmds = mysql_flavor().change_master_commands(
                            utils.hostname,
                            tablet_62344.mysql_port,
                            pos)
    tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
                        changeMasterCmds +
                        ['START SLAVE'])

    # now shutdown all mysqld
    shutdown_procs = [
        tablet_62344.shutdown_mysql(),
        tablet_62044.shutdown_mysql(),
        ]
    utils.wait_procs(shutdown_procs)

    # start the tablets, wait for them to be NOT_SERVING (mysqld not there)
    tablet_62344.init_tablet('master', 'test_keyspace', '0')
    tablet_62044.init_tablet('spare', 'test_keyspace', '0',
                             include_mysql_port=False)
    for t in tablet_62344, tablet_62044:
      t.start_vttablet(wait_for_state=None,
                       target_tablet_type='replica',
                       full_mycnf_args=True, include_mysql_port=False)
    for t in tablet_62344, tablet_62044:
      t.wait_for_vttablet_state('NOT_SERVING')
      self.check_healthz(t, False)

    # restart mysqld
    start_procs = [
        tablet_62344.start_mysql(),
        tablet_62044.start_mysql(),
        ]
    utils.wait_procs(start_procs)

    # the master should still be healthy
    utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias, 'replica'],
                      auto_log=True)
    self.check_healthz(tablet_62344, True)
    
    # the slave won't be healthy at first, as replication is not running
    utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'],
                      auto_log=True)
    self.check_healthz(tablet_62044, False)
    tablet_62044.wait_for_vttablet_state('NOT_SERVING')

    # restart replication
    tablet_62044.mquery('', ['START SLAVE'])

    # wait for the tablet to become healthy and fix its mysql port
    utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias, 'replica'],
                    auto_log=True)
    tablet_62044.wait_for_vttablet_state('SERVING')
    self.check_healthz(tablet_62044, True)

    for t in tablet_62344, tablet_62044:
      # wait for mysql port to show up
      timeout = 10
      while True:
        ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
        if 'mysql' in ti['Portmap']:
          break
        timeout = utils.wait_step('mysql port in tablet record', timeout)
      self.assertEqual(ti['Portmap']['mysql'], t.mysql_port)

    # all done
    tablet.kill_tablets([tablet_62344, tablet_62044])
Esempio n. 51
0
    def test_reparent_with_down_slave(self, shard_id='0'):
        """See if a missing slave can be safely reparented after the fact."""
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])

        # create the database so vttablets start, as they are serving
        tablet_62344.create_db('vt_test_keyspace')
        tablet_62044.create_db('vt_test_keyspace')
        tablet_41983.create_db('vt_test_keyspace')
        tablet_31981.create_db('vt_test_keyspace')

        # Start up a master mysql and vttablet
        tablet_62344.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)

        # Create a few slaves for testing reparenting.
        tablet_62044.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_31981.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)
        tablet_41983.init_tablet('replica',
                                 'test_keyspace',
                                 shard_id,
                                 start=True,
                                 wait_for_start=False)

        # wait for all tablets to start
        for t in [tablet_62344, tablet_62044, tablet_31981, tablet_41983]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # Force the slaves to reparent assuming that all the datasets are identical.
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/' + shard_id,
            tablet_62344.tablet_alias
        ])
        utils.validate_topology(ping_tablets=True)
        tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)

        utils.wait_procs([tablet_41983.shutdown_mysql()])

        # Perform a graceful reparent operation. It will fail as one tablet is down.
        _, stderr = utils.run_vtctl([
            'PlannedReparentShard', '-keyspace_shard', 'test_keyspace/' +
            shard_id, '-new_master', tablet_62044.tablet_alias
        ],
                                    expect_fail=True)
        self.assertIn('TabletManager.SetMaster on test_nj-0000041983 error',
                      stderr)

        # insert data into the new master, check the connected slaves work
        self._populate_vt_insert_test(tablet_62044, 3)
        self._check_vt_insert_test(tablet_31981, 3)
        self._check_vt_insert_test(tablet_62344, 3)

        # restart mysql on the old slave, should still be connecting to the
        # old master
        utils.wait_procs([tablet_41983.start_mysql()])

        utils.pause('check orphan')

        # reparent the tablet (will not start replication, so we have to
        # do it ourselves), then it should catch up on replication really quickly
        utils.run_vtctl(['ReparentTablet', tablet_41983.tablet_alias])
        utils.run_vtctl(['StartSlave', tablet_41983.tablet_alias])

        # wait until it gets the data
        self._check_vt_insert_test(tablet_41983, 3)

        tablet.kill_tablets(
            [tablet_62344, tablet_62044, tablet_41983, tablet_31981])
Esempio n. 52
0
    def setUp(self):
        utils.zk_setup()
        utils.setup()
        if self.vttop is None:
            raise EnvironmentError("VTTOP not defined")
        if self.vtroot is None:
            raise EnvironmentError("VTROOT not defined")

        framework.execute('go build',
                          verbose=utils.options.verbose,
                          cwd=self.vttop + '/go/cmd/mysqlctl')

        utils.wait_procs([self.tablet.init_mysql()])
        self.tablet.mquery(
            "",
            ["create database vt_test_keyspace", "set global read_only = off"])

        self.mysql_conn, mcu = self.tablet.connect('vt_test_keyspace')
        self.clean_sqls = []
        self.init_sqls = []
        clean_mode = False
        with open(
                os.path.join(self.vttop, "test", "test_data",
                             "test_schema.sql")) as f:
            for line in f:
                line = line.rstrip()
                if line == "# clean":
                    clean_mode = True
                if line == '' or line.startswith("#"):
                    continue
                if clean_mode:
                    self.clean_sqls.append(line)
                else:
                    self.init_sqls.append(line)
        try:
            for line in self.init_sqls:
                mcu.execute(line, {})
        finally:
            mcu.close()

        utils.run_vtctl(
            'CreateKeyspace -force /zk/global/vt/keyspaces/test_keyspace')
        self.tablet.init_tablet('master', 'test_keyspace', '0')

        customrules = '/tmp/customrules.json'
        self.create_customrules(customrules)
        schema_override = '/tmp/schema_override.json'
        self.create_schema_override(schema_override)
        if utils.options.memcache:
            self.tablet.start_vttablet(memcache=True,
                                       customrules=customrules,
                                       schema_override=schema_override)
        else:
            self.tablet.start_vttablet(customrules=customrules,
                                       schema_override=schema_override)

        # FIXME(szopa): This is necessary here only because of a bug that
        # makes the qs reload its config only after an action.
        utils.run_vtctl('Ping ' + self.tablet.zk_tablet_path)

        for i in range(30):
            try:
                self.conn = self.connect()
                self.txlogger = subprocess.Popen(
                    ['curl', '-s', '-N', 'http://localhost:9461/debug/txlog'],
                    stdout=open('/tmp/vtocc_txlog.log', 'w'))
                self.txlog = framework.Tailer(open('/tmp/vtocc_txlog.log'),
                                              flush=self.tablet.flush)
                self.log = framework.Tailer(open(
                    os.path.join(self.tablet.tablet_dir, 'vttablet.INFO')),
                                            flush=self.tablet.flush)
                querylog_file = '/tmp/vtocc_streamlog_%s.log' % self.tablet.port
                utils.run_bg([
                    'curl', '-s', '-N',
                    'http://localhost:9461/debug/querylog?full=true'
                ],
                             stdout=open(querylog_file, 'w'))
                time.sleep(1)
                self.querylog = framework.Tailer(open(querylog_file),
                                                 sleep=0.1)

                return
            except dbexceptions.OperationalError:
                if i == 29:
                    raise
                time.sleep(1)
Esempio n. 53
0
    def setUp(self):
        self.master = tablet.Tablet()
        self.replica = tablet.Tablet()
        self.all_tablets = [self.master, self.replica]

        try:
            environment.topo_server().setup()

            setup_procs = [t.init_mysql() for t in self.all_tablets]
            utils.Vtctld().start()
            utils.wait_procs(setup_procs)

            utils.run_vtctl(['CreateKeyspace', KEYSPACE])

            # Start tablets.
            db_name = 'vt_' + KEYSPACE
            for t in self.all_tablets:
                t.create_db(db_name)
            self.master.start_vttablet(wait_for_state=None,
                                       init_tablet_type='replica',
                                       init_keyspace=KEYSPACE,
                                       init_shard=SHARD,
                                       tablet_index=0)
            self.replica.start_vttablet(wait_for_state=None,
                                        init_tablet_type='replica',
                                        init_keyspace=KEYSPACE,
                                        init_shard=SHARD,
                                        tablet_index=1)
            for t in self.all_tablets:
                t.wait_for_vttablet_state('NOT_SERVING')

            # Reparent to choose an initial master and enable replication.
            utils.run_vtctl([
                'InitShardMaster', '-force',
                '%s/%s' % (KEYSPACE, SHARD), self.master.tablet_alias
            ])

            # Create the schema.
            utils.run_vtctl(['ApplySchema', '-sql=' + SCHEMA, KEYSPACE])

            # Start vtgate.
            utils.VtGate().start(
                extra_args=[
                    '-enable_vtgate_buffer',
                    # Long timeout in case failover is slow.
                    '-vtgate_buffer_window',
                    '10m',
                    '-vtgate_buffer_max_failover_duration',
                    '10m',
                    '-vtgate_buffer_min_time_between_failovers',
                    '20m'
                ],
                tablets=self.all_tablets)

            # Insert two rows for the later threads (critical read, update).
            with utils.vtgate.write_transaction(keyspace=KEYSPACE,
                                                shards=[SHARD],
                                                tablet_type='master') as tx:
                tx.execute('INSERT INTO buffer (id, msg) VALUES (:id, :msg)', {
                    'id': CRITICAL_READ_ROW_ID,
                    'msg': 'critical read'
                })
                tx.execute('INSERT INTO buffer (id, msg) VALUES (:id, :msg)', {
                    'id': UPDATE_ROW_ID,
                    'msg': 'update'
                })
        except:
            self.tearDown()
            raise
Esempio n. 54
0
def setUpModule():
  try:
    environment.topo_server().setup()

    setup_procs = [
        src_master.init_mysql(),
        src_replica.init_mysql(),
        src_rdonly1.init_mysql(),
        src_rdonly2.init_mysql(),
        dst_master.init_mysql(),
        dst_replica.init_mysql(),
        ]
    utils.Vtctld().start()
    utils.wait_procs(setup_procs)

    # Set up binlog stream from shard 0 to shard 1.
    # Modeled after initial_sharding.py.
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                     'keyspace_id', keyrange_constants.KIT_UINT64])

    src_master.init_tablet('master', 'test_keyspace', '0')
    src_replica.init_tablet('replica', 'test_keyspace', '0')
    src_rdonly1.init_tablet('rdonly', 'test_keyspace', '0')
    src_rdonly2.init_tablet('rdonly', 'test_keyspace', '0')

    utils.run_vtctl(['RebuildShardGraph', 'test_keyspace/0'])
    utils.validate_topology()

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    for t in [src_master, src_replica, src_rdonly1, src_rdonly2]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)

    for t in [src_master, src_replica, src_rdonly1, src_rdonly2]:
      t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/0',
                     src_master.tablet_alias], auto_log=True)

    # Create schema
    logging.debug("Creating schema...")
    create_table = '''create table test_table(
        id bigint auto_increment,
        keyspace_id bigint(20) unsigned,
        msg varchar(64),
        primary key (id),
        index by_msg (msg)
        ) Engine=InnoDB'''

    utils.run_vtctl(['ApplySchema',
                     '-sql=' + create_table,
                     'test_keyspace'], auto_log=True)

    # Create destination shard.
    dst_master.init_tablet('master', 'test_keyspace', '-')
    dst_replica.init_tablet('replica', 'test_keyspace', '-')
    dst_master.start_vttablet(wait_for_state='NOT_SERVING')
    dst_replica.start_vttablet(wait_for_state='NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/-',
                     dst_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # copy the schema
    utils.run_vtctl(['CopySchemaShard', src_replica.tablet_alias,
                     'test_keyspace/-'], auto_log=True)

    # run the clone worked (this is a degenerate case, source and destination
    # both have the full keyrange. Happens to work correctly).
    logging.debug("Running the clone worker to start binlog stream...")
    utils.run_vtworker(['--cell', 'test_nj',
                        'SplitClone',
                        '--strategy=-populate_blp_checkpoint',
                        '--source_reader_count', '10',
                        '--min_table_size_for_split', '1',
                        'test_keyspace/0'],
                        auto_log=True)
    dst_master.wait_for_binlog_player_count(1)

    # Wait for dst_replica to be ready.
    dst_replica.wait_for_binlog_server_state("Enabled")
  except:
    tearDownModule()
    raise
Esempio n. 55
0
def setUpModule():
    global new_init_db, db_credentials_file
    global tablet_master, tablet_replica1, tablet_replica2

    tablet_master = tablet.Tablet(use_mysqlctld=use_mysqlctld,
                                  vt_dba_passwd='VtDbaPass')
    tablet_replica1 = tablet.Tablet(use_mysqlctld=use_mysqlctld,
                                    vt_dba_passwd='VtDbaPass')
    tablet_replica2 = tablet.Tablet(use_mysqlctld=use_mysqlctld,
                                    vt_dba_passwd='VtDbaPass')

    try:
        environment.topo_server().setup()

        credentials = {
            'vt_dba': ['VtDbaPass'],
            'vt_app': ['VtAppPass'],
            'vt_allprivs': ['VtAllprivsPass'],
            'vt_repl': ['VtReplPass'],
            'vt_filtered': ['VtFilteredPass'],
        }
        db_credentials_file = environment.tmproot + '/db_credentials.json'
        with open(db_credentials_file, 'w') as fd:
            fd.write(json.dumps(credentials))

        # Determine which column is used for user passwords in this MySQL version.
        proc = tablet_master.init_mysql()
        if use_mysqlctld:
            tablet_master.wait_for_mysqlctl_socket()
        else:
            utils.wait_procs([proc])
        try:
            tablet_master.mquery('mysql',
                                 'select password from mysql.user limit 0',
                                 user='******')
            password_col = 'password'
        except MySQLdb.DatabaseError:
            password_col = 'authentication_string'
        utils.wait_procs([tablet_master.teardown_mysql()])
        tablet_master.remove_tree(ignore_options=True)

        # Create a new init_db.sql file that sets up passwords for all users.
        # Then we use a db-credentials-file with the passwords.
        new_init_db = environment.tmproot + '/init_db_with_passwords.sql'
        with open(environment.vttop + '/config/init_db.sql') as fd:
            init_db = fd.read()
        with open(new_init_db, 'w') as fd:
            fd.write(init_db)
            fd.write(mysql_flavor().change_passwords(password_col))

        # start mysql instance external to the test
        setup_procs = [
            tablet_master.init_mysql(
                init_db=new_init_db,
                extra_args=['-db-credentials-file', db_credentials_file]),
            tablet_replica1.init_mysql(
                init_db=new_init_db,
                extra_args=['-db-credentials-file', db_credentials_file]),
            tablet_replica2.init_mysql(
                init_db=new_init_db,
                extra_args=['-db-credentials-file', db_credentials_file]),
        ]
        if use_mysqlctld:
            tablet_master.wait_for_mysqlctl_socket()
            tablet_replica1.wait_for_mysqlctl_socket()
            tablet_replica2.wait_for_mysqlctl_socket()
        else:
            utils.wait_procs(setup_procs)
    except:
        tearDownModule()
        raise
Esempio n. 56
0
def setUpModule():
    global new_init_db, db_credentials_file

    try:
        credentials = {
            'vt_dba': ['VtDbaPass'],
            'vt_app': ['VtAppPass'],
            'vt_allprivs': ['VtAllprivsPass'],
            'vt_repl': ['VtReplPass'],
            'vt_filtered': ['VtFilteredPass'],
        }
        db_credentials_file = environment.tmproot + '/db_credentials.json'
        with open(db_credentials_file, 'w') as fd:
            fd.write(json.dumps(credentials))

        # Determine which column is used for user passwords in this MySQL version.
        proc = ks1_shard_master.init_mysql()
        utils.wait_procs([proc])
        try:
            ks1_shard_master.mquery('mysql',
                                    'select password from mysql.user limit 0',
                                    user='******')
            password_col = 'password'
        except MySQLdb.DatabaseError:
            password_col = 'authentication_string'
        utils.wait_procs([ks1_shard_master.teardown_mysql()])
        ks1_shard_master.remove_tree(ignore_options=True)

        # Create a new init_db.sql file that sets up passwords for all users.
        # Then we use a db-credentials-file with the passwords.
        new_init_db = environment.tmproot + '/init_db_with_passwords.sql'
        with open(environment.vttop + '/config/init_db.sql') as fd:
            init_db = fd.read()
        with open(new_init_db, 'w') as fd:
            fd.write(init_db)
            fd.write(mysql_flavor().change_passwords(password_col))
            fd.write('''

# connecting through a port requires 127.0.0.1
# --host=localhost will connect through socket
CREATE USER 'vt_dba'@'127.0.0.1' IDENTIFIED BY 'VtDbaPass';
GRANT ALL ON *.* TO 'vt_dba'@'127.0.0.1';
GRANT GRANT OPTION ON *.* TO 'vt_dba'@'127.0.0.1';

# User for app traffic, with global read-write access.
CREATE USER 'vt_app'@'127.0.0.1' IDENTIFIED BY 'VtAppPass';
GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE,
  REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES,
  LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW,
  SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER
  ON *.* TO 'vt_app'@'127.0.0.1';

# User for administrative operations that need to be executed as non-SUPER.
# Same permissions as vt_app here.
CREATE USER 'vt_allprivs'@'127.0.0.1' IDENTIFIED BY 'VtAllPrivsPass';
GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE,
  REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES,
  LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW,
  SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER
  ON *.* TO 'vt_allprivs'@'127.0.0.1';

# User for Vitess filtered replication (binlog player).
# Same permissions as vt_app.
CREATE USER 'vt_filtered'@'127.0.0.1' IDENTIFIED BY 'VtFilteredPass';
GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE,
  REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES,
  LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW,
  SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER
  ON *.* TO 'vt_filtered'@'127.0.0.1';

FLUSH PRIVILEGES;
''')
        setup_procs = [
            t.init_mysql(
                use_rbr=True,
                init_db=new_init_db,
                extra_args=['-db-credentials-file', db_credentials_file])
            for t in all_mysql_tablets
        ]
        utils.wait_procs(setup_procs)
        for i in range(0, len(all_other_tablets)):
            all_other_tablets[i].mysql_port = all_mysql_tablets[i].mysql_port

        environment.topo_server().setup()

    except:
        tearDownModule()
        raise
Esempio n. 57
0
    def verify_successful_worker_copy_with_reparent(self, mysql_down=False):
        """Verifies that vtworker can successfully copy data for a SplitClone.

    Order of operations:
    1. Run a background vtworker
    2. Wait until the worker successfully resolves the destination masters.
    3. Reparent the destination tablets
    4. Wait until the vtworker copy is finished
    5. Verify that the worker was forced to reresolve topology and retry writes
      due to the reparent.
    6. Verify that the data was copied successfully to both new shards

    Args:
      mysql_down: boolean. If True, we take down the MySQL instances on the
        destination masters at first, then bring them back and reparent away.

    Raises:
      AssertionError if things didn't go as expected.
    """
        if mysql_down:
            logging.debug('Shutting down mysqld on destination masters.')
            utils.wait_procs([
                shard_0_master.shutdown_mysql(),
                shard_1_master.shutdown_mysql()
            ])

        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            ['--cell', 'test_nj'], auto_log=True)

        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        # --chunk_count is 2 because rows are currently ordered by primary key such
        # that all rows of the first shard come first and then the second shard.
        # TODO(mberlin): Remove --offline=false once vtworker ensures that the
        #                destination shards are not behind the master's replication
        #                position.
        args = [
            'SplitClone', '--offline=false', '--destination_writer_count', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999'
        ]
        if not mysql_down:
            # Make the clone as slow as necessary such that there is enough time to
            # run PlannedReparent in the meantime.
            # TOOD(mberlin): Once insert_values is fixed to uniformly distribute the
            #                rows across shards when sorted by primary key, remove
            #                --chunk_count 2, --min_rows_per_chunk 1 and set
            #                --source_reader_count back to 1.
            args.extend([
                '--source_reader_count', '2', '--chunk_count', '2',
                '--min_rows_per_chunk', '1', '--write_query_max_rows', '1'
            ])
        args.append('test_keyspace/0')
        workerclient_proc = utils.run_vtworker_client_bg(args, worker_rpc_port)

        if mysql_down:
            # If MySQL is down, we wait until vtworker retried at least once to make
            # sure it reached the point where a write failed due to MySQL being down.
            # There should be two retries at least, one for each destination shard.
            utils.poll_for_vars(
                'vtworker',
                worker_port,
                'WorkerRetryCount >= 2',
                condition_fn=lambda v: v.get('WorkerRetryCount') >= 2)
            logging.debug(
                'Worker has retried at least twice, starting reparent now')

            # vtworker is blocked at this point. This is a good time to test that its
            # throttler server is reacting to RPCs.
            self.check_throttler_service(
                'localhost:%d' % worker_rpc_port,
                ['test_keyspace/-80', 'test_keyspace/80-'], 9999)

            # Bring back masters. Since we test with semi-sync now, we need at least
            # one replica for the new master. This test is already quite expensive,
            # so we bring back the old master as a replica rather than having a third
            # replica up the whole time.
            logging.debug('Restarting mysqld on destination masters')
            utils.wait_procs(
                [shard_0_master.start_mysql(),
                 shard_1_master.start_mysql()])

            # Reparent away from the old masters.
            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/-80',
                shard_0_replica.tablet_alias
            ],
                            auto_log=True)
            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/80-',
                shard_1_replica.tablet_alias
            ],
                            auto_log=True)

        else:
            # NOTE: There is a race condition around this:
            #   It's possible that the SplitClone vtworker command finishes before the
            #   PlannedReparentShard vtctl command, which we start below, succeeds.
            #   Then the test would fail because vtworker did not have to retry.
            #
            # To workaround this, the test takes a parameter to increase the number of
            # rows that the worker has to copy (with the idea being to slow the worker
            # down).
            # You should choose a value for num_insert_rows, such that this test
            # passes for your environment (trial-and-error...)
            # Make sure that vtworker got past the point where it picked a master
            # for each destination shard ("finding targets" state).
            utils.poll_for_vars(
                'vtworker',
                worker_port,
                'WorkerState == cloning the data (online)',
                condition_fn=lambda v: v.get('WorkerState') == 'cloning the'
                ' data (online)')
            logging.debug('Worker is in copy state, starting reparent now')

            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/-80',
                shard_0_replica.tablet_alias
            ],
                            auto_log=True)
            utils.run_vtctl([
                'PlannedReparentShard', 'test_keyspace/80-',
                shard_1_replica.tablet_alias
            ],
                            auto_log=True)

        utils.wait_procs([workerclient_proc])

        # Verify that we were forced to re-resolve and retry.
        worker_vars = utils.get_vars(worker_port)
        self.assertGreater(
            worker_vars['WorkerRetryCount'], 1,
            "expected vtworker to retry each of the two reparented"
            " destination masters at least once, but it didn't")
        self.assertNotEqual(worker_vars['WorkerRetryCount'], {},
                            "expected vtworker to retry, but it didn't")
        utils.kill_sub_process(worker_proc, soft=True)

        # Wait for the destination RDONLYs to catch up or the following offline
        # clone will try to insert rows which already exist.
        # TODO(mberlin): Remove this once SplitClone supports it natively.
        utils.wait_for_replication_pos(shard_0_replica, shard_0_rdonly1)
        utils.wait_for_replication_pos(shard_1_replica, shard_1_rdonly1)
        # Run final offline clone to enable filtered replication.
        _, _ = utils.run_vtworker([
            '-cell', 'test_nj', 'SplitClone', '--online=false',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0'
        ],
                                  auto_log=True)

        # Make sure that everything is caught up to the same replication point
        self.run_split_diff('test_keyspace/-80', all_shard_tablets,
                            shard_0_tablets)
        self.run_split_diff('test_keyspace/80-', all_shard_tablets,
                            shard_1_tablets)

        self.assert_shard_data_equal(0, shard_master, shard_0_tablets.replica)
        self.assert_shard_data_equal(1, shard_master, shard_1_tablets.replica)
Esempio n. 58
0
  def test_merge_sharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'custom_ksid_col',
                     '--sharding_column_type', base_sharding.keyspace_id_type,
                     'test_keyspace'])

    shard_0_master.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
    shard_1_master.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
    shard_2_master.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    # rebuild and check SrvKeyspace
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None,
                       binlog_use_v3_resharding_mode=False)

    # won't be serving, no replication state
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-40',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/40-80',
                     shard_1_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
                     shard_2_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_replica]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
    for t in [shard_0_rdonly, shard_1_rdonly]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

    # create the merge shards
    shard_dest_master.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

    # start vttablet on the destination shard (no db created,
    # so they're all not serving)
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.start_vttablet(wait_for_state=None,
                       binlog_use_v3_resharding_mode=False)
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
                     shard_dest_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -40 40-80 80-\n'
        'Partitions(rdonly): -40 40-80 80-\n'
        'Partitions(replica): -40 40-80 80-\n',
        keyspace_id_type=base_sharding.keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    # copy the schema
    utils.run_vtctl(['CopySchemaShard', shard_0_rdonly.tablet_alias,
                     'test_keyspace/-80'], auto_log=True)

    # copy the data (will also start filtered replication), reset source
    # Run vtworker as daemon for the following SplitClone commands.
    worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
        ['--cell', 'test_nj', '--command_display_interval', '10ms',
          '--use_v3_resharding_mode=false'],
        auto_log=True)

    # Initial clone (online).
    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--offline=false',
         '--chunk_count', '10',
         '--min_rows_per_chunk', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/-80'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        2, 0, 0, 0)

    # Reset vtworker such that we can run the next command.
    workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
    utils.wait_procs([workerclient_proc])

    # Modify the destination shard. SplitClone will revert the changes.
    # Delete row 1 (provokes an insert).
    shard_dest_master.mquery('vt_test_keyspace',
                             'delete from resharding1 where id=1', write=True)
    # Update row 2 (provokes an update).
    shard_dest_master.mquery(
        'vt_test_keyspace', "update resharding1 set msg='msg-not-2' where id=2",
        write=True)
    # Insert row 0 (provokes a delete).
    self._insert_value(shard_dest_master, 'resharding1', 0, 'msg0',
                       0x5000000000000000)

    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--chunk_count', '10',
         '--min_rows_per_chunk', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/-80'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    # Change tablets, which were taken offline, back to rdonly.
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        1, 1, 1, 0)
    self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1',
                                        0, 0, 0, 2)
    # Terminate worker daemon because it is no longer needed.
    utils.kill_sub_process(worker_proc, soft=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check binlog player variables
    self.check_destination_master(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_0_replica, horizontal=True)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True)

    # testing filtered replication: insert a bunch of data on shard 0 and 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shards')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 10)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 30)
    self.check_binlog_player_vars(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_0_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias])
    logging.debug('Running vtworker SplitDiff on first half')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--source_uid', '1',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    logging.debug('Running vtworker SplitDiff on second half')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--source_uid', '2',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)

    # get status for the destination master tablet, make sure we have it all
    self.check_running_binlog_player(shard_dest_master, 3000, 1000)

    # check destination master query service is not running
    utils.check_tablet_query_service(self, shard_dest_master, False, False)
    stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                          '-count', '1',
                                          shard_dest_master.tablet_alias])
    logging.debug('Got health: %s', str(stream_health))
    self.assertIn('realtime_stats', stream_health)
    self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_dest_master.get_healthz()

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -40 40-80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # now serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # now serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_master, False, True)
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_dest_master)

    # kill the original tablets in the original shards
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly,
                         shard_1_master, shard_1_replica, shard_1_rdonly])
    for t in [shard_0_replica, shard_0_rdonly,
              shard_1_replica, shard_1_rdonly]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    for t in [shard_0_master, shard_1_master]:
      utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                      auto_log=True)

    # delete the original shards
    utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
    utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards should be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_2_master, shard_2_replica, shard_2_rdonly,
                         shard_dest_master, shard_dest_replica,
                         shard_dest_rdonly])
Esempio n. 59
0
def setup():
    utils.zk_setup()

    utils.debug("Creating certificates")
    os.makedirs(cert_dir)

    # Create CA certificate
    ca_key = cert_dir + "/ca-key.pem"
    ca_cert = cert_dir + "/ca-cert.pem"
    openssl(["genrsa", "-out", cert_dir + "/ca-key.pem"])
    ca_config = cert_dir + "/ca.config"
    with open(ca_config, 'w') as fd:
        fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Mysql CA
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl([
        "req", "-new", "-x509", "-nodes", "-days", "3600", "-batch", "-config",
        ca_config, "-key", ca_key, "-out", ca_cert
    ])

    # Create mysql server certificate, remove passphrase, and sign it
    server_key = cert_dir + "/server-key.pem"
    server_cert = cert_dir + "/server-cert.pem"
    server_req = cert_dir + "/server-req.pem"
    server_config = cert_dir + "/server.config"
    with open(server_config, 'w') as fd:
        fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Mysql Server
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl([
        "req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch",
        "-config", server_config, "-keyout", server_key, "-out", server_req
    ])
    openssl(["rsa", "-in", server_key, "-out", server_key])
    openssl([
        "x509", "-req", "-in", server_req, "-days", "3600", "-CA", ca_cert,
        "-CAkey", ca_key, "-set_serial", "01", "-out", server_cert
    ])

    # Create mysql client certificate, remove passphrase, and sign it
    client_key = cert_dir + "/client-key.pem"
    client_cert = cert_dir + "/client-cert.pem"
    client_req = cert_dir + "/client-req.pem"
    client_config = cert_dir + "/client.config"
    with open(client_config, 'w') as fd:
        fd.write("""
[ req ]
 default_bits           = 1024
 default_keyfile        = keyfile.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
 prompt                 = no
 output_password        = mypass
[ req_distinguished_name ]
 C                      = US
 ST                     = California
 L                      = Mountain View
 O                      = Google
 OU                     = Vitess
 CN                     = Mysql Client
 emailAddress           = [email protected]
[ req_attributes ]
 challengePassword      = A challenge password
""")
    openssl([
        "req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch",
        "-config", client_config, "-keyout", client_key, "-out", client_req
    ])
    openssl(["rsa", "-in", client_key, "-out", client_key])
    openssl([
        "x509", "-req", "-in", client_req, "-days", "3600", "-CA", ca_cert,
        "-CAkey", ca_key, "-set_serial", "02", "-out", client_cert
    ])

    # Create vt server certificate, remove passphrase, and sign it
    vt_server_key = cert_dir + "/vt-server-key.pem"
    vt_server_cert = cert_dir + "/vt-server-cert.pem"
    vt_server_req = cert_dir + "/vt-server-req.pem"
    openssl([
        "req", "-newkey", "rsa:2048", "-days", "3600", "-nodes", "-batch",
        "-keyout", vt_server_key, "-out", vt_server_req
    ])
    openssl(["rsa", "-in", vt_server_key, "-out", vt_server_key])
    openssl([
        "x509", "-req", "-in", vt_server_req, "-days", "3600", "-CA", ca_cert,
        "-CAkey", ca_key, "-set_serial", "03", "-out", vt_server_cert
    ])

    extra_my_cnf = cert_dir + "/secure.cnf"
    fd = open(extra_my_cnf, "w")
    fd.write("ssl-ca=" + ca_cert + "\n")
    fd.write("ssl-cert=" + server_cert + "\n")
    fd.write("ssl-key=" + server_key + "\n")
    fd.close()

    setup_procs = [
        shard_0_master.init_mysql(extra_my_cnf=extra_my_cnf),
        shard_0_slave.init_mysql(extra_my_cnf=extra_my_cnf),
    ]
    utils.wait_procs(setup_procs)

    utils.run_vtctl('CreateKeyspace test_keyspace')

    shard_0_master.init_tablet('master', 'test_keyspace', '0')
    shard_0_slave.init_tablet('replica', 'test_keyspace', '0')

    utils.run_vtctl('RebuildShardGraph test_keyspace/0', auto_log=True)

    utils.run_vtctl('RebuildKeyspaceGraph test_keyspace', auto_log=True)

    # create databases so vttablet can start behaving normally
    shard_0_master.create_db('vt_test_keyspace')
    shard_0_slave.create_db('vt_test_keyspace')
Esempio n. 60
0
    def test_resharding(self):
        # we're going to reparent and swap these two
        global shard_2_master, shard_2_replica1

        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'custom_ksid_col', base_sharding.keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = (
            base_sharding.keyspace_id_type == keyrange_constants.KIT_BYTES)

        # create databases so vttablet can start behaving somewhat normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets (replication is not setup, the slaves won't be
        # healthy)
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('NOT_SERVING')
        shard_0_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')
        shard_1_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
        shard_1_rdonly1.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
        self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_2_master.start_vttablet(wait_for_state=None)
        shard_3_master.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
                shard_3_replica, shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_2_rdonly1, shard_3_master, shard_3_replica,
                shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/c0-', shard_3_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        for s in ['-80', '80-', '80-c0', 'c0-']:
            self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # disable shard_1_slave2, so we're sure filtered replication will go
        # from shard_1_slave1
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        # Run vtworker as daemon for the following SplitClone commands.
        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            ['--cell', 'test_nj', '--command_display_interval', '10ms'],
            auto_log=True)

        # Copy the data from the source to the destination shards.
        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        #
        # Initial clone (online).
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 2, 0, 0, 0)

        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Test the correct handling of keyspace_id changes which happen after
        # the first clone.
        # Let row 2 go to shard 3 instead of shard 2.
        shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set'
                              ' custom_ksid_col=0xD000000000000000 WHERE id=2',
                              write=True)
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Row 2 will be deleted from shard 2 and inserted to shard 3.
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 0, 1, 1)
        self._check_value(shard_2_master,
                          'resharding1',
                          2,
                          'msg2',
                          0xD000000000000000,
                          should_be_here=False)
        self._check_value(shard_3_master, 'resharding1', 2, 'msg2',
                          0xD000000000000000)
        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Move row 2 back to shard 2 from shard 3 by changing the keyspace_id again.
        shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set'
                              ' custom_ksid_col=0x9000000000000000 WHERE id=2',
                              write=True)
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Row 2 will be deleted from shard 3 and inserted to shard 2.
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 0, 1, 1)
        self._check_value(shard_2_master, 'resharding1', 2, 'msg2',
                          0x9000000000000000)
        self._check_value(shard_3_master,
                          'resharding1',
                          2,
                          'msg2',
                          0x9000000000000000,
                          should_be_here=False)
        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Modify the destination shard. SplitClone will revert the changes.
        # Delete row 2 (provokes an insert).
        shard_2_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=2',
                              write=True)
        # Update row 3 (provokes an update).
        shard_3_master.mquery(
            'vt_test_keyspace',
            "update resharding1 set msg='msg-not-3' where id=3",
            write=True)
        # Insert row 4 and 5 (provokes a delete).
        self._insert_value(shard_3_master, 'resharding1', 4, 'msg4',
                           0xD000000000000000)
        self._insert_value(shard_3_master, 'resharding1', 5, 'msg5',
                           0xD000000000000000)

        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count',
            '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets',
            '1', '--max_tps', '9999', 'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Change tablet, which was taken offline, back to rdonly.
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 1, 2, 0)
        self.verify_reconciliation_counters(worker_port, 'Offline',
                                            'resharding1', 0, 0, 0, 2)
        # Terminate worker daemon because it is no longer needed.
        utils.kill_sub_process(worker_proc, soft=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
        self.check_destination_master(shard_3_master, ['test_keyspace/80-'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

        # Check that the throttler was enabled.
        self.check_throttler_service(shard_2_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)
        self.check_throttler_service(shard_3_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_1_slave1,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for destination master tablets, make sure we have it all
        self.check_running_binlog_player(shard_2_master, 4000, 2000)
        self.check_running_binlog_player(shard_3_master, 4000, 2000)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 1, 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 2, 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low', 1)
        monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high', 2)

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)
        self.check_binlog_server_vars(shard_1_slave2,
                                      horizontal=True,
                                      min_statements=800,
                                      min_transactions=800)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                        auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace(
            'test_ny',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace(
            'test_ny',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])

        # update our test variables to point at the new master
        shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

        logging.debug(
            'Inserting lots of data on source shard after reparenting')
        self._insert_lots(3000, base=2000)
        logging.debug('Checking 80 percent of data was sent fairly quickly')
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug(
            'DELAY 1: %s max_lag=%d ms avg_lag=%d ms',
            monitor_thread_1.thread_name, monitor_thread_1.max_lag_ms,
            monitor_thread_1.lag_sum_ms / monitor_thread_1.sample_count)
        logging.debug(
            'DELAY 2: %s max_lag=%d ms avg_lag=%d ms',
            monitor_thread_2.thread_name, monitor_thread_2.max_lag_ms,
            monitor_thread_2.lag_sum_ms / monitor_thread_2.sample_count)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-c0 c0-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_2_master)
        self.check_no_binlog_player(shard_3_master)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
            shard_3_master, shard_3_replica, shard_3_rdonly1
        ])