Example #1
0
def setup_sharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', SHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', SHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet('master', keyspace=SHARDED_KEYSPACE, shard='-80')
  shard_0_replica.init_tablet('replica', keyspace=SHARDED_KEYSPACE, shard='-80')
  shard_0_rdonly.init_tablet('rdonly', keyspace=SHARDED_KEYSPACE, shard='-80')
  shard_1_master.init_tablet('master', keyspace=SHARDED_KEYSPACE, shard='80-')
  shard_1_replica.init_tablet('replica', keyspace=SHARDED_KEYSPACE,  shard='80-')
  shard_1_rdonly.init_tablet('rdonly', keyspace=SHARDED_KEYSPACE,  shard='80-')

  utils.run_vtctl(['RebuildKeyspaceGraph', SHARDED_KEYSPACE,], auto_log=True)

  for t in [shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master, shard_1_replica, shard_1_rdonly]:
    t.create_db('vt_test_keyspace_sharded')
    t.mquery(shard_0_master.dbname, create_vt_insert_test)
    t.start_vttablet(wait_for_state=None)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['ReparentShard', '-force', '%s/-80' % SHARDED_KEYSPACE,
                   shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['ReparentShard', '-force', '%s/80-' % SHARDED_KEYSPACE,
                   shard_1_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', SHARDED_KEYSPACE],
                   auto_log=True)

  utils.check_srv_keyspace('test_nj', SHARDED_KEYSPACE,
                           'Partitions(master): -80 80-\n' +
                           'Partitions(rdonly): -80 80-\n' +
                           'Partitions(replica): -80 80-\n' +
                           'TabletTypes: master,rdonly,replica')
Example #2
0
def setup_unsharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', UNSHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', UNSHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])
  unsharded_master.init_tablet('master', keyspace=UNSHARDED_KEYSPACE, shard='0')
  unsharded_replica.init_tablet('replica', keyspace=UNSHARDED_KEYSPACE, shard='0')
  unsharded_rdonly.init_tablet('rdonly', keyspace=UNSHARDED_KEYSPACE, shard='0')

  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE,], auto_log=True)

  for t in [unsharded_master, unsharded_replica, unsharded_rdonly]:
    t.create_db('vt_test_keyspace_unsharded')
    t.mquery(unsharded_master.dbname, create_vt_insert_test)
    t.start_vttablet(wait_for_state=None)

  for t in [unsharded_master, unsharded_replica, unsharded_rdonly]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['ReparentShard', '-force', '%s/0' % UNSHARDED_KEYSPACE,
                   unsharded_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE],
                   auto_log=True)

  utils.check_srv_keyspace('test_nj', UNSHARDED_KEYSPACE,
                           'Partitions(master): -\n' +
                           'Partitions(rdonly): -\n' +
                           'Partitions(replica): -\n' +
                           'TabletTypes: master,rdonly,replica')
Example #3
0
def setup_sharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', SHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', SHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])
  shard_0_master.start_vttablet(
      wait_for_state=None, target_tablet_type='replica',
      init_keyspace=SHARDED_KEYSPACE, init_shard='-80')
  shard_0_replica.start_vttablet(
      wait_for_state=None, target_tablet_type='replica',
      init_keyspace=SHARDED_KEYSPACE, init_shard='-80')
  shard_1_master.start_vttablet(
      wait_for_state=None, target_tablet_type='replica',
      init_keyspace=SHARDED_KEYSPACE, init_shard='80-')
  shard_1_replica.start_vttablet(
      wait_for_state=None, target_tablet_type='replica',
      init_keyspace=SHARDED_KEYSPACE, init_shard='80-')

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('NOT_SERVING')

  utils.run_vtctl(['InitShardMaster', '-force', '%s/-80' % SHARDED_KEYSPACE,
                   shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['InitShardMaster', '-force', '%s/80-' % SHARDED_KEYSPACE,
                   shard_1_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['ApplySchema', '-sql', create_vt_insert_test,
                   SHARDED_KEYSPACE])

  utils.check_srv_keyspace('test_nj', SHARDED_KEYSPACE,
                           'Partitions(master): -80 80-\n'
                           'Partitions(rdonly): -80 80-\n'
                           'Partitions(replica): -80 80-\n')
Example #4
0
def setup_sharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', SHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', SHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet('master', keyspace=SHARDED_KEYSPACE, shard='-80')
  shard_0_replica.init_tablet(
      'replica', keyspace=SHARDED_KEYSPACE, shard='-80')
  shard_1_master.init_tablet('master', keyspace=SHARDED_KEYSPACE, shard='80-')
  shard_1_replica.init_tablet(
      'replica', keyspace=SHARDED_KEYSPACE, shard='80-')

  utils.run_vtctl(['RebuildKeyspaceGraph', SHARDED_KEYSPACE,], auto_log=True)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.create_db('vt_test_keyspace_sharded')
    t.mquery(shard_0_master.dbname, create_vt_insert_test)
    t.start_vttablet(wait_for_state=None)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['InitShardMaster', '%s/-80' % SHARDED_KEYSPACE,
                   shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['InitShardMaster', '%s/80-' % SHARDED_KEYSPACE,
                   shard_1_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', SHARDED_KEYSPACE],
                  auto_log=True)

  utils.check_srv_keyspace('test_nj', SHARDED_KEYSPACE,
                           'Partitions(master): -80 80-\n'
                           'Partitions(rdonly): -80 80-\n'
                           'Partitions(replica): -80 80-\n')
Example #5
0
def setup_unsharded_keyspace():
    utils.run_vtctl(["CreateKeyspace", UNSHARDED_KEYSPACE])
    utils.run_vtctl(["SetKeyspaceShardingInfo", "-force", UNSHARDED_KEYSPACE, "keyspace_id", "uint64"])
    unsharded_master.init_tablet("master", keyspace=UNSHARDED_KEYSPACE, shard="0")
    unsharded_replica.init_tablet("replica", keyspace=UNSHARDED_KEYSPACE, shard="0")

    utils.run_vtctl(["RebuildKeyspaceGraph", UNSHARDED_KEYSPACE], auto_log=True)

    for t in [unsharded_master, unsharded_replica]:
        t.create_db("vt_test_keyspace_unsharded")
        t.mquery(unsharded_master.dbname, create_vt_insert_test)
        t.start_vttablet(wait_for_state=None)

    for t in [unsharded_master, unsharded_replica]:
        t.wait_for_vttablet_state("SERVING")

    utils.run_vtctl(["InitShardMaster", "%s/0" % UNSHARDED_KEYSPACE, unsharded_master.tablet_alias], auto_log=True)

    utils.run_vtctl(["RebuildKeyspaceGraph", UNSHARDED_KEYSPACE], auto_log=True)

    utils.check_srv_keyspace(
        "test_nj",
        UNSHARDED_KEYSPACE,
        "Partitions(master): -\n" + "Partitions(rdonly): -\n" + "Partitions(replica): -\n",
    )
Example #6
0
def setup_sharded_keyspace():
    utils.run_vtctl(['CreateKeyspace', SHARDED_KEYSPACE])
    utils.run_vtctl([
        'SetKeyspaceShardingInfo', '-force', SHARDED_KEYSPACE, 'keyspace_id',
        'uint64'
    ])

    shard_0_master.init_tablet('replica',
                               keyspace=SHARDED_KEYSPACE,
                               shard='-80',
                               tablet_index=0)
    shard_0_replica.init_tablet('replica',
                                keyspace=SHARDED_KEYSPACE,
                                shard='-80',
                                tablet_index=1)
    shard_1_master.init_tablet('replica',
                               keyspace=SHARDED_KEYSPACE,
                               shard='80-',
                               tablet_index=0)
    shard_1_replica.init_tablet('replica',
                                keyspace=SHARDED_KEYSPACE,
                                shard='80-',
                                tablet_index=1)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.create_db('vt_test_keyspace_sharded')
        t.mquery(shard_0_master.dbname, create_vt_insert_test)
        t.start_vttablet(wait_for_state=None)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl([
        'InitShardMaster', '-force',
        '%s/-80' % SHARDED_KEYSPACE, shard_0_master.tablet_alias
    ],
                    auto_log=True)
    utils.run_vtctl([
        'InitShardMaster', '-force',
        '%s/80-' % SHARDED_KEYSPACE, shard_1_master.tablet_alias
    ],
                    auto_log=True)

    for t in [shard_0_replica, shard_1_replica]:
        utils.wait_for_tablet_type(t.tablet_alias, 'replica')
    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.wait_for_vttablet_state('SERVING')

    # rebuild to be sure we have the latest data
    utils.run_vtctl(['RebuildKeyspaceGraph', SHARDED_KEYSPACE], auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', SHARDED_KEYSPACE, 'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n')
Example #7
0
def start_tablets():
  global __tablets
  # start tablets
  for t in __tablets:
    t.start_vttablet(wait_for_state=None)

  # wait for them to come in serving state
  for t in __tablets:
    t.wait_for_vttablet_state('SERVING')

  # InitShardMaster for master tablets
  for t in __tablets:
    if t.tablet_type == 'master':
      utils.run_vtctl(['InitShardMaster', t.keyspace+'/'+t.shard,
                       t.tablet_alias], auto_log=True)

  for ks in topo_schema.keyspaces:
    ks_name = ks[0]
    ks_type = ks[1]
    utils.run_vtctl(['RebuildKeyspaceGraph', ks_name],
                     auto_log=True)
    if ks_type == shard_constants.RANGE_SHARDED:
      utils.check_srv_keyspace('test_nj', ks_name,
                               'Partitions(master): -80 80-\n'
                               'Partitions(rdonly): -80 80-\n'
                               'Partitions(replica): -80 80-\n')
  def verify(self):
    self.assert_shard_data_equal(0, worker.shard_master,
                                 worker.shard_0_tablets.replica)
    self.assert_shard_data_equal(1, worker.shard_master,
                                 worker.shard_1_tablets.replica)

    # Verify effect of MigrateServedTypes. Dest shards are serving now.
    utils.check_srv_keyspace('test_nj', self.KEYSPACE,
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n')

    # source shard: query service must be disabled after MigrateServedTypes.
    source_shards = [worker.shard_rdonly1,
                     worker.shard_replica,
                     worker.shard_master]
    for shard in source_shards:
      utils.check_tablet_query_service(
          self, shard, serving=False, tablet_control_disabled=True)

    # dest shard -80, 80-: query service must be enabled
    # after MigrateServedTypes.
    dest_shards = [worker.shard_0_rdonly1,
                   worker.shard_0_replica,
                   worker.shard_0_master,
                   worker.shard_1_rdonly1,
                   worker.shard_1_replica,
                   worker.shard_1_master]
    for shard in dest_shards:
      utils.check_tablet_query_service(
          self,
          shard,
          serving=True,
          tablet_control_disabled=False)
    def verify(self):
        self.assert_shard_data_equal(0, worker.shard_master,
                                     worker.shard_0_tablets.replica)
        self.assert_shard_data_equal(1, worker.shard_master,
                                     worker.shard_1_tablets.replica)

        # Verify effect of MigrateServedTypes. Dest shards are serving now.
        utils.check_srv_keyspace(
            'test_nj', self.KEYSPACE, 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n')

        # source shard: query service must be disabled after MigrateServedTypes.
        source_shards = [
            worker.shard_rdonly1, worker.shard_replica, worker.shard_master
        ]
        for shard in source_shards:
            utils.check_tablet_query_service(self,
                                             shard,
                                             serving=False,
                                             tablet_control_disabled=True)

        # dest shard -80, 80-: query service must be enabled
        # after MigrateServedTypes.
        dest_shards = [
            worker.shard_0_rdonly1, worker.shard_0_replica,
            worker.shard_0_master, worker.shard_1_rdonly1,
            worker.shard_1_replica, worker.shard_1_master
        ]
        for shard in dest_shards:
            utils.check_tablet_query_service(self,
                                             shard,
                                             serving=True,
                                             tablet_control_disabled=False)
Example #10
0
def start_tablets():
  global __tablets
  # start tablets
  for t in __tablets:
    t.start_vttablet(wait_for_state=None)

  # wait for them to come in serving state
  for t in __tablets:
    t.wait_for_vttablet_state('SERVING')

  # ReparentShard for master tablets
  for t in __tablets:
    if t.tablet_type == 'master':
      utils.run_vtctl(['ReparentShard', '-force', t.keyspace+'/'+t.shard,
                       t.tablet_alias], auto_log=True)

  for ks in topo_schema.keyspaces:
    ks_name = ks[0]
    ks_type = ks[1]
    utils.run_vtctl(['RebuildKeyspaceGraph', ks_name],
                     auto_log=True)
    if ks_type == shard_constants.RANGE_SHARDED:
      utils.check_srv_keyspace('test_nj', ks_name,
                               'Partitions(master): -80 80-\n' +
                               'Partitions(replica): -80 80-\n' +
                               'TabletTypes: master,replica')
Example #11
0
def setup_unsharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', UNSHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', UNSHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])
  unsharded_master.init_tablet(
      'master', keyspace=UNSHARDED_KEYSPACE, shard='0')
  unsharded_replica.init_tablet(
      'replica', keyspace=UNSHARDED_KEYSPACE, shard='0')

  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE,], auto_log=True)

  for t in [unsharded_master, unsharded_replica]:
    t.create_db('vt_test_keyspace_unsharded')
    t.mquery(unsharded_master.dbname, create_vt_insert_test)
    t.start_vttablet(wait_for_state=None)

  for t in [unsharded_master, unsharded_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['InitShardMaster', '%s/0' % UNSHARDED_KEYSPACE,
                   unsharded_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE],
                  auto_log=True)

  utils.check_srv_keyspace('test_nj', UNSHARDED_KEYSPACE,
                           'Partitions(master): -\n'
                           'Partitions(rdonly): -\n'
                           'Partitions(replica): -\n')
Example #12
0
def setup_tablets():
    global vtgate_server
    global vtgate_port

    # Start up a master mysql and vttablet
    logging.debug("Setting up tablets")
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
    utils.run_vtctl([
        'SetKeyspaceShardingInfo', '-force', 'test_keyspace', 'keyspace_id',
        'uint64'
    ])
    shard_0_master.init_tablet('master', keyspace='test_keyspace', shard='-80')
    shard_0_replica.init_tablet('replica',
                                keyspace='test_keyspace',
                                shard='-80')
    shard_1_master.init_tablet('master', keyspace='test_keyspace', shard='80-')
    shard_1_replica.init_tablet('replica',
                                keyspace='test_keyspace',
                                shard='80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.create_db('vt_test_keyspace')
        t.mquery(shard_0_master.dbname, create_vt_insert_test)
        t.mquery(shard_0_master.dbname, create_vt_a)
        t.start_vttablet(wait_for_state=None)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl([
        'ReparentShard', '-force', 'test_keyspace/-80',
        shard_0_master.tablet_alias
    ],
                    auto_log=True)
    utils.run_vtctl([
        'ReparentShard', '-force', 'test_keyspace/80-',
        shard_1_master.tablet_alias
    ],
                    auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n' +
        'Partitions(replica): -80 80-\n' + 'TabletTypes: master,replica')

    vtgate_server, vtgate_port = utils.vtgate_start()
    vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                          "test_nj", 30.0)
    topology.read_topology(vtgate_client)
Example #13
0
def setup_tablets():
    global vtgate_server
    global vtgate_port

    # Start up a master mysql and vttablet
    logging.debug("Setting up tablets")
    utils.run_vtctl(['CreateKeyspace', KEYSPACE_NAME])
    utils.run_vtctl([
        'SetKeyspaceShardingInfo', '-force', KEYSPACE_NAME, 'keyspace_id',
        'uint64'
    ])
    shard_0_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='-80')
    shard_0_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='-80')
    shard_1_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='80-')
    shard_1_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.create_db('vt_test_keyspace')
        for create_table in create_tables:
            t.mquery(shard_0_master.dbname, create_table)
        t.start_vttablet(wait_for_state=None)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl([
        'ReparentShard', '-force', KEYSPACE_NAME + '/-80',
        shard_0_master.tablet_alias
    ],
                    auto_log=True)
    utils.run_vtctl([
        'ReparentShard', '-force', KEYSPACE_NAME + '/80-',
        shard_1_master.tablet_alias
    ],
                    auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

    utils.check_srv_keyspace(
        'test_nj', KEYSPACE_NAME, 'Partitions(master): -80 80-\n' +
        'Partitions(replica): -80 80-\n' + 'TabletTypes: master,replica')

    vtgate_server, vtgate_port = utils.vtgate_start()
    # FIXME(shrutip): this should be removed once vtgate_cursor's
    # dependency on topology goes away.
    vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                          "test_nj", 30.0)
    topology.read_topology(vtgate_client)
    def verify(self):
        self.assert_shard_data_equal(0, worker.shard_master, worker.shard_0_tablets.replica)
        self.assert_shard_data_equal(1, worker.shard_master, worker.shard_1_tablets.replica)

        # Verify effect of MigrateServedTypes. Dest shards are serving now.
        utils.check_srv_keyspace(
            "test_nj",
            self.KEYSPACE,
            "Partitions(master): -80 80-\n" "Partitions(rdonly): -80 80-\n" "Partitions(replica): -80 80-\n",
        )

        # Check that query service is disabled (source shard) or enabled (dest).

        # The 'rdonly' tablet requires an explicit healthcheck first because
        # the following sequence of events is happening in this test:
        # - SplitDiff returns 'rdonly' as 'spare' tablet (NOT_SERVING)
        # - MigrateServedTypes runs and does not refresh then 'spare' tablet
        #   (still NOT_SERVING)
        #   Shard_TabletControl.DisableQueryService=true will be set in the topology
        # - explicit or periodic healthcheck runs:
        #   a) tablet seen as caught up, change type from 'spare' to 'rdonly'
        #      (change to SERVING)
        #   b) post-action callback agent.refreshTablet() reads the topology
        #      and finds out that DisableQueryService=true is set.
        #      (change to NOT_SERVING)
        #
        # We must run an explicit healthcheck or we can see one of the two states:
        # - NOT_SERVING, DisableQueryService=false, tablet type 'spare'
        #   (immediately after SplitDiff returned)
        # - SERVING, DisableQueryService=false, tablet type 'rdonly'
        #   (during healthcheck before post-action callback is called)
        utils.run_vtctl(["RunHealthCheck", worker.shard_rdonly1.tablet_alias], auto_log=True)

        # source shard: query service must be disabled after MigrateServedTypes.
        utils.check_tablet_query_service(self, worker.shard_rdonly1, serving=False, tablet_control_disabled=True)
        utils.check_tablet_query_service(self, worker.shard_replica, serving=False, tablet_control_disabled=True)
        utils.check_tablet_query_service(self, worker.shard_master, serving=False, tablet_control_disabled=True)

        # dest shard -80: query service must be disabled after MigrateServedTypes.
        # Run explicit healthcheck because 'rdonly' tablet may still be 'spare'.
        utils.run_vtctl(["RunHealthCheck", worker.shard_0_rdonly1.tablet_alias], auto_log=True)
        utils.check_tablet_query_service(self, worker.shard_0_rdonly1, serving=True, tablet_control_disabled=False)
        utils.check_tablet_query_service(self, worker.shard_0_replica, serving=True, tablet_control_disabled=False)
        utils.check_tablet_query_service(self, worker.shard_0_master, serving=True, tablet_control_disabled=False)

        # dest shard 80-: query service must be disabled after MigrateServedTypes.
        # Run explicit healthcheck because 'rdonly' tablet is still 'spare'.
        utils.run_vtctl(["RunHealthCheck", worker.shard_1_rdonly1.tablet_alias], auto_log=True)
        utils.check_tablet_query_service(self, worker.shard_1_rdonly1, serving=True, tablet_control_disabled=False)
        utils.check_tablet_query_service(self, worker.shard_1_replica, serving=True, tablet_control_disabled=False)
        utils.check_tablet_query_service(self, worker.shard_1_master, serving=True, tablet_control_disabled=False)
Example #15
0
def setup_tablets():
    # Start up a master mysql and vttablet
    logging.debug('Setting up tablets')
    utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
    utils.run_vtctl([
        'SetKeyspaceShardingInfo', '-force', 'test_keyspace', 'keyspace_id',
        'uint64'
    ])
    shard_0_master.init_tablet('master', keyspace='test_keyspace', shard='-80')
    shard_0_replica.init_tablet('replica',
                                keyspace='test_keyspace',
                                shard='-80')
    shard_1_master.init_tablet('master', keyspace='test_keyspace', shard='80-')
    shard_1_replica.init_tablet('replica',
                                keyspace='test_keyspace',
                                shard='80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.create_db('vt_test_keyspace')
        t.mquery(shard_0_master.dbname, create_vt_insert_test)
        t.mquery(shard_0_master.dbname, create_vt_a)
        t.start_vttablet(wait_for_state=None)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl(
        ['InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias],
        auto_log=True)
    utils.run_vtctl(
        ['InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias],
        auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace', 'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n')

    utils.VtGate().start()
    vtgate_client = zkocc.ZkOccConnection(utils.vtgate.addr(), 'test_nj', 30.0)
    topology.read_topology(vtgate_client)
Example #16
0
def setup_tablets():
    global vtgate_server
    global vtgate_port

    # Start up a master mysql and vttablet
    logging.debug("Setting up tablets")
    utils.run_vtctl(['CreateKeyspace', KEYSPACE_NAME])
    utils.run_vtctl([
        'SetKeyspaceShardingInfo', '-force', KEYSPACE_NAME, 'keyspace_id',
        'uint64'
    ])
    shard_0_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='-80')
    shard_0_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='-80')
    shard_1_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='80-')
    shard_1_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.create_db('vt_test_keyspace')
        for create_table in create_tables:
            t.mquery(shard_0_master.dbname, create_table)
        t.start_vttablet(wait_for_state=None)

    for t in [
            shard_0_master, shard_0_replica, shard_1_master, shard_1_replica
    ]:
        t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl([
        'ReparentShard', '-force', KEYSPACE_NAME + '/-80',
        shard_0_master.tablet_alias
    ],
                    auto_log=True)
    utils.run_vtctl([
        'ReparentShard', '-force', KEYSPACE_NAME + '/80-',
        shard_1_master.tablet_alias
    ],
                    auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

    utils.check_srv_keyspace(
        'test_nj', KEYSPACE_NAME, 'Partitions(master): -80 80-\n' +
        'Partitions(replica): -80 80-\n' + 'TabletTypes: master,replica')

    vtgate_server, vtgate_port = utils.vtgate_start()
def setup_tablets():
  # Start up a master mysql and vttablet
  logging.debug('Setting up tablets')
  utils.run_vtctl(['CreateKeyspace', KEYSPACE_NAME])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', KEYSPACE_NAME,
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet(
      'master',
      keyspace=KEYSPACE_NAME,
      shard='0',
      tablet_index=0)
  shard_0_replica1.init_tablet(
      'replica',
      keyspace=KEYSPACE_NAME,
      shard='0',
      tablet_index=1)

  utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

  for t in [shard_0_master, shard_0_replica1]:
    t.create_db('vt_test_keyspace')
    for create_table in create_tables:
      t.mquery(shard_0_master.dbname, create_table)
    t.start_vttablet(wait_for_state=None, target_tablet_type='replica')

  for t in [shard_0_master]:
    t.wait_for_vttablet_state('SERVING')
  for t in [shard_0_replica1]:
    t.wait_for_vttablet_state('NOT_SERVING')

  utils.run_vtctl(['InitShardMaster', KEYSPACE_NAME+'/0',
                   shard_0_master.tablet_alias], auto_log=True)

  for t in [shard_0_replica1]:
    utils.wait_for_tablet_type(t.tablet_alias, 'replica')

  for t in [shard_0_master, shard_0_replica1]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(
      ['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

  utils.check_srv_keyspace(
      'test_nj', KEYSPACE_NAME,
      'Partitions(master): -\n'
      'Partitions(rdonly): -\n'
      'Partitions(replica): -\n')
Example #18
0
def setup_tablets():
    # Start up a master mysql and vttablet
    logging.debug('Setting up tablets')
    utils.run_vtctl(['CreateKeyspace', KEYSPACE_NAME])
    utils.run_vtctl([
        'SetKeyspaceShardingInfo', '-force', KEYSPACE_NAME, 'keyspace_id',
        'uint64'
    ])
    shard_0_master.init_tablet('master',
                               keyspace=KEYSPACE_NAME,
                               shard='0',
                               tablet_index=0)
    shard_0_replica1.init_tablet('replica',
                                 keyspace=KEYSPACE_NAME,
                                 shard='0',
                                 tablet_index=1)

    utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

    for t in [shard_0_master, shard_0_replica1]:
        t.create_db('vt_test_keyspace')
        for create_table in create_tables:
            t.mquery(shard_0_master.dbname, create_table)
        t.start_vttablet(wait_for_state=None, target_tablet_type='replica')

    for t in [shard_0_master]:
        t.wait_for_vttablet_state('SERVING')
    for t in [shard_0_replica1]:
        t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(
        ['InitShardMaster', KEYSPACE_NAME + '/0', shard_0_master.tablet_alias],
        auto_log=True)

    for t in [shard_0_replica1]:
        utils.wait_for_tablet_type(t.tablet_alias, 'replica')

    for t in [shard_0_master, shard_0_replica1]:
        t.wait_for_vttablet_state('SERVING')

    utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

    utils.check_srv_keyspace(
        'test_nj', KEYSPACE_NAME, 'Partitions(master): -\n'
        'Partitions(rdonly): -\n'
        'Partitions(replica): -\n')
Example #19
0
def setup_tablets():
  global vtgate_server
  global vtgate_port

  # Start up a master mysql and vttablet
  logging.debug("Setting up tablets")
  utils.run_vtctl(['CreateKeyspace', KEYSPACE_NAME])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', KEYSPACE_NAME,
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='-80')
  shard_0_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='-80')
  shard_1_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='80-')
  shard_1_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='80-')

  utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.create_db('vt_test_keyspace')
    for create_table in create_tables:
      t.mquery(shard_0_master.dbname, create_table)
    t.start_vttablet(wait_for_state=None)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['ReparentShard', '-force', KEYSPACE_NAME+'/-80',
                   shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['ReparentShard', '-force', KEYSPACE_NAME+'/80-',
                   shard_1_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME],
                   auto_log=True)

  utils.check_srv_keyspace('test_nj', KEYSPACE_NAME,
                           'Partitions(master): -80 80-\n' +
                           'Partitions(replica): -80 80-\n' +
                           'TabletTypes: master,replica')

  vtgate_server, vtgate_port = utils.vtgate_start()
  # FIXME(shrutip): this should be removed once vtgate_cursor's
  # dependency on topology goes away.
  vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                        "test_nj", 30.0)
  topology.read_topology(vtgate_client)
Example #20
0
def setup_tablets():
  global vtgate_server
  global vtgate_port

  # Start up a master mysql and vttablet
  logging.debug("Setting up tablets")
  utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet('master', keyspace='test_keyspace', shard='-80')
  shard_0_replica.init_tablet('replica', keyspace='test_keyspace', shard='-80')
  shard_1_master.init_tablet('master', keyspace='test_keyspace', shard='80-')
  shard_1_replica.init_tablet('replica', keyspace='test_keyspace', shard='80-')

  utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.create_db('vt_test_keyspace')
    t.mquery(shard_0_master.dbname, create_vt_insert_test)
    t.mquery(shard_0_master.dbname, create_vt_a)
    t.start_vttablet(wait_for_state=None)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/-80',
                   shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-',
                   shard_1_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                   auto_log=True)

  utils.check_srv_keyspace('test_nj', 'test_keyspace',
                           'Partitions(master): -80 80-\n' +
                           'Partitions(replica): -80 80-\n' +
                           'TabletTypes: master,replica')

  vtgate_server, vtgate_port = utils.vtgate_start()
  vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port,
                                        "test_nj", 30.0)
  topology.read_topology(vtgate_client)
Example #21
0
def setup_tablets():
  # Start up a master mysql and vttablet
  logging.debug('Setting up tablets')
  utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet('master', keyspace='test_keyspace', shard='-80')
  shard_0_replica.init_tablet('replica', keyspace='test_keyspace', shard='-80')
  shard_1_master.init_tablet('master', keyspace='test_keyspace', shard='80-')
  shard_1_replica.init_tablet('replica', keyspace='test_keyspace', shard='80-')

  utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.create_db('vt_test_keyspace')
    t.mquery(shard_0_master.dbname, create_vt_insert_test)
    t.mquery(shard_0_master.dbname, create_vt_a)
    t.start_vttablet(wait_for_state=None)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(
      ['InitShardMaster', 'test_keyspace/-80',
       shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(
      ['InitShardMaster', 'test_keyspace/80-',
       shard_1_master.tablet_alias], auto_log=True)

  utils.run_vtctl(
      ['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

  utils.check_srv_keyspace('test_nj', 'test_keyspace',
                           'Partitions(master): -80 80-\n'
                           'Partitions(rdonly): -80 80-\n'
                           'Partitions(replica): -80 80-\n')

  utils.VtGate().start()
  vtgate_client = zkocc.ZkOccConnection(utils.vtgate.addr(), 'test_nj', 30.0)
  topology.read_topology(vtgate_client)
Example #22
0
def setup_unsharded_keyspace():
  utils.run_vtctl(['CreateKeyspace', UNSHARDED_KEYSPACE])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', UNSHARDED_KEYSPACE,
                   'keyspace_id', 'uint64'])

  unsharded_master.init_tablet(
      'master',
      keyspace=UNSHARDED_KEYSPACE,
      shard='0',
      tablet_index=0)
  unsharded_replica.init_tablet(
      'replica',
      keyspace=UNSHARDED_KEYSPACE,
      shard='0',
      tablet_index=1)

  for t in [unsharded_master, unsharded_replica]:
    t.create_db('vt_test_keyspace_unsharded')
    t.mquery(unsharded_master.dbname, create_vt_insert_test)
    t.start_vttablet(wait_for_state=None)

  for t in [unsharded_master]:
    t.wait_for_vttablet_state('SERVING')
  for t in [unsharded_replica]:
    t.wait_for_vttablet_state('NOT_SERVING')

  utils.run_vtctl(['InitShardMaster', '-force', '%s/0' % UNSHARDED_KEYSPACE,
                   unsharded_master.tablet_alias], auto_log=True)

  for t in [unsharded_replica]:
    utils.wait_for_tablet_type(t.tablet_alias, 'replica')
  for t in [unsharded_master, unsharded_replica]:
    t.wait_for_vttablet_state('SERVING')

  # rebuild to be sure we have the right version
  utils.run_vtctl(['RebuildKeyspaceGraph', UNSHARDED_KEYSPACE], auto_log=True)
  utils.check_srv_keyspace('test_nj', UNSHARDED_KEYSPACE,
                           'Partitions(master): -\n'
                           'Partitions(rdonly): -\n'
                           'Partitions(replica): -\n')
Example #23
0
def setup_tablets():
  global vtgate_server
  global vtgate_port

  # Start up a master mysql and vttablet
  logging.debug("Setting up tablets")
  utils.run_vtctl(['CreateKeyspace', KEYSPACE_NAME])
  utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', KEYSPACE_NAME,
                   'keyspace_id', 'uint64'])
  shard_0_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='-80')
  shard_0_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='-80')
  shard_1_master.init_tablet('master', keyspace=KEYSPACE_NAME, shard='80-')
  shard_1_replica.init_tablet('replica', keyspace=KEYSPACE_NAME, shard='80-')

  utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME], auto_log=True)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.create_db('vt_test_keyspace')
    for create_table in create_tables:
      t.mquery(shard_0_master.dbname, create_table)
    t.start_vttablet(wait_for_state=None)

  for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
    t.wait_for_vttablet_state('SERVING')

  utils.run_vtctl(['ReparentShard', '-force', KEYSPACE_NAME+'/-80',
                   shard_0_master.tablet_alias], auto_log=True)
  utils.run_vtctl(['ReparentShard', '-force', KEYSPACE_NAME+'/80-',
                   shard_1_master.tablet_alias], auto_log=True)

  utils.run_vtctl(['RebuildKeyspaceGraph', KEYSPACE_NAME],
                   auto_log=True)

  utils.check_srv_keyspace('test_nj', KEYSPACE_NAME,
                           'Partitions(master): -80 80-\n' +
                           'Partitions(replica): -80 80-\n' +
                           'TabletTypes: master,replica')

  vtgate_server, vtgate_port = utils.vtgate_start()
Example #24
0
def setup_tablets():
    global vtgate_server
    global vtgate_port

    # Start up a master mysql and vttablet
    logging.debug("Setting up tablets")
    utils.run_vtctl(["CreateKeyspace", "test_keyspace"])
    utils.run_vtctl(["SetKeyspaceShardingInfo", "-force", "test_keyspace", "keyspace_id", "uint64"])
    shard_0_master.init_tablet("master", keyspace="test_keyspace", shard="-80")
    shard_0_replica.init_tablet("replica", keyspace="test_keyspace", shard="-80")
    shard_1_master.init_tablet("master", keyspace="test_keyspace", shard="80-")
    shard_1_replica.init_tablet("replica", keyspace="test_keyspace", shard="80-")

    utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

    for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
        t.create_db("vt_test_keyspace")
        t.mquery(shard_0_master.dbname, create_vt_insert_test)
        t.mquery(shard_0_master.dbname, create_vt_a)
        t.start_vttablet(wait_for_state=None)

    for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
        t.wait_for_vttablet_state("SERVING")

    utils.run_vtctl(["ReparentShard", "-force", "test_keyspace/-80", shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(["ReparentShard", "-force", "test_keyspace/80-", shard_1_master.tablet_alias], auto_log=True)

    utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

    utils.check_srv_keyspace(
        "test_nj",
        "test_keyspace",
        "Partitions(master): -80 80-\n" + "Partitions(replica): -80 80-\n" + "TabletTypes: master,replica",
    )

    vtgate_server, vtgate_port = utils.vtgate_start()
    vtgate_client = zkocc.ZkOccConnection("localhost:%u" % vtgate_port, "test_nj", 30.0)
    topology.read_topology(vtgate_client)
Example #25
0
def setup_tablets():
    global vtgate_server
    global vtgate_port

    # Start up a master mysql and vttablet
    logging.debug("Setting up tablets")
    utils.run_vtctl(["CreateKeyspace", KEYSPACE_NAME])
    utils.run_vtctl(["SetKeyspaceShardingInfo", "-force", KEYSPACE_NAME, "keyspace_id", "uint64"])
    shard_0_master.init_tablet("master", keyspace=KEYSPACE_NAME, shard="-80")
    shard_0_replica.init_tablet("replica", keyspace=KEYSPACE_NAME, shard="-80")
    shard_1_master.init_tablet("master", keyspace=KEYSPACE_NAME, shard="80-")
    shard_1_replica.init_tablet("replica", keyspace=KEYSPACE_NAME, shard="80-")

    utils.run_vtctl(["RebuildKeyspaceGraph", KEYSPACE_NAME], auto_log=True)

    for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
        t.create_db("vt_test_keyspace")
        for create_table in create_tables:
            t.mquery(shard_0_master.dbname, create_table)
        t.start_vttablet(wait_for_state=None)

    for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_replica]:
        t.wait_for_vttablet_state("SERVING")

    utils.run_vtctl(["ReparentShard", "-force", KEYSPACE_NAME + "/-80", shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(["ReparentShard", "-force", KEYSPACE_NAME + "/80-", shard_1_master.tablet_alias], auto_log=True)

    utils.run_vtctl(["RebuildKeyspaceGraph", KEYSPACE_NAME], auto_log=True)

    utils.check_srv_keyspace(
        "test_nj",
        KEYSPACE_NAME,
        "Partitions(master): -80 80-\n" + "Partitions(replica): -80 80-\n" + "TabletTypes: master,replica",
    )

    vtgate_server, vtgate_port = utils.vtgate_start()
Example #26
0
    def test_merge_sharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'custom_sharding_key',
            '--sharding_column_type', keyspace_id_type, 'test_keyspace'
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-40')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
        shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
        shard_1_master.init_tablet('master', 'test_keyspace', '40-80')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
        shard_2_master.init_tablet('master', 'test_keyspace', '80-')
        shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
        shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        # rebuild and check SrvKeyspace
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_sharding_key')

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly,
                shard_1_master, shard_1_replica, shard_1_rdonly,
                shard_2_master, shard_2_replica, shard_2_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        # masters will be serving
        for t in [shard_0_master, shard_1_master, shard_2_master]:
            t.wait_for_vttablet_state('SERVING')

        # slaves won't be, no replication state
        for t in [
                shard_0_replica, shard_0_rdonly, shard_1_replica,
                shard_1_rdonly, shard_2_replica, shard_2_rdonly
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-40', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/40-80',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_2_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_replica]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_rdonly, shard_1_rdonly]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the merge shards
        shard_dest_master.init_tablet('master', 'test_keyspace', '-80')
        shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

        # start vttablet on the destination shard (no db created,
        # so they're all not serving)
        for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
            t.start_vttablet(wait_for_state=None)
        for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80',
            shard_dest_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -40 40-80 80-\n'
                                 'Partitions(rdonly): -40 40-80 80-\n'
                                 'Partitions(replica): -40 40-80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # copy the schema
        utils.run_vtctl([
            'CopySchemaShard', shard_0_rdonly.tablet_alias, 'test_keyspace/-80'
        ],
                        auto_log=True)

        # copy the data (will also start filtered replication), reset source
        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'SplitClone', '--source_reader_count', '10',
            '--min_table_size_for_split', '1', '--min_healthy_rdonly_tablets',
            '1', 'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check binlog player variables
        self.check_destination_master(
            shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_0_replica, horizontal=True)
        self.check_binlog_server_vars(shard_1_replica, horizontal=True)

        # testing filtered replication: insert a bunch of data on shard 0 and 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shards')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 10)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 30)
        self.check_binlog_player_vars(
            shard_dest_master, ['test_keyspace/-40', 'test_keyspace/40-80'],
            seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_0_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)
        self.check_binlog_server_vars(shard_1_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias])
        logging.debug('Running vtworker SplitDiff on first half')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', '--source_uid', '0',
            'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        logging.debug('Running vtworker SplitDiff on second half')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', '--source_uid', '1',
            'test_keyspace/-80'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_dest_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # get status for the destination master tablet, make sure we have it all
        self.check_running_binlog_player(shard_dest_master, 3000, 1000)

        # check destination master query service is not running
        utils.check_tablet_query_service(self, shard_dest_master, False, False)
        stream_health = utils.run_vtctl_json([
            'VtTabletStreamHealth', '-count', '1',
            shard_dest_master.tablet_alias
        ])
        logging.debug('Got health: %s', str(stream_health))
        self.assertIn('realtime_stats', stream_health)
        self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_dest_master.get_healthz()

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -40 40-80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -40 40-80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # now serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -40 40-80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')

        # now serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_sharding_key')
        utils.check_tablet_query_service(self, shard_0_master, False, True)
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_dest_master)

        # kill the original tablets in the original shards
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master,
            shard_1_replica, shard_1_rdonly
        ])
        for t in [
                shard_0_replica, shard_0_rdonly, shard_1_replica,
                shard_1_rdonly
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        for t in [shard_0_master, shard_1_master]:
            utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                            auto_log=True)

        # delete the original shards
        utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
        utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_2_master, shard_2_replica, shard_2_rdonly, shard_dest_master,
            shard_dest_replica, shard_dest_rdonly
        ])
Example #27
0
  def test_merge_sharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'custom_ksid_col',
                     '--sharding_column_type', base_sharding.keyspace_id_type,
                     'test_keyspace'])

    shard_0_master.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
    shard_1_master.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
    shard_2_master.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    # rebuild and check SrvKeyspace
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None,
                       binlog_use_v3_resharding_mode=False)

    # won't be serving, no replication state
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-40',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/40-80',
                     shard_1_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
                     shard_2_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_replica]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
    for t in [shard_0_rdonly, shard_1_rdonly]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

    # create the merge shards
    shard_dest_master.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

    # start vttablet on the destination shard (no db created,
    # so they're all not serving)
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.start_vttablet(wait_for_state=None,
                       binlog_use_v3_resharding_mode=False)
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
                     shard_dest_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -40 40-80 80-\n'
        'Partitions(rdonly): -40 40-80 80-\n'
        'Partitions(replica): -40 40-80 80-\n',
        keyspace_id_type=base_sharding.keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    # copy the schema
    utils.run_vtctl(['CopySchemaShard', shard_0_rdonly.tablet_alias,
                     'test_keyspace/-80'], auto_log=True)

    # copy the data (will also start filtered replication), reset source
    # Run vtworker as daemon for the following SplitClone commands.
    worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
        ['--cell', 'test_nj', '--command_display_interval', '10ms',
          '--use_v3_resharding_mode=false'],
        auto_log=True)

    # Initial clone (online).
    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--offline=false',
         '--chunk_count', '10',
         '--min_rows_per_chunk', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/-80'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        2, 0, 0, 0)

    # Reset vtworker such that we can run the next command.
    workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
    utils.wait_procs([workerclient_proc])

    # Modify the destination shard. SplitClone will revert the changes.
    # Delete row 1 (provokes an insert).
    shard_dest_master.mquery('vt_test_keyspace',
                             'delete from resharding1 where id=1', write=True)
    # Update row 2 (provokes an update).
    shard_dest_master.mquery(
        'vt_test_keyspace', "update resharding1 set msg='msg-not-2' where id=2",
        write=True)
    # Insert row 0 (provokes a delete).
    self._insert_value(shard_dest_master, 'resharding1', 0, 'msg0',
                       0x5000000000000000)

    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--chunk_count', '10',
         '--min_rows_per_chunk', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/-80'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    # Change tablets, which were taken offline, back to rdonly.
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        1, 1, 1, 0)
    self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1',
                                        0, 0, 0, 2)
    # Terminate worker daemon because it is no longer needed.
    utils.kill_sub_process(worker_proc, soft=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check binlog player variables
    self.check_destination_master(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_0_replica, horizontal=True)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True)

    # testing filtered replication: insert a bunch of data on shard 0 and 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shards')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 10)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 30)
    self.check_binlog_player_vars(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_0_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias])
    logging.debug('Running vtworker SplitDiff on first half')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--source_uid', '0',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    logging.debug('Running vtworker SplitDiff on second half')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--source_uid', '1',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)

    # get status for the destination master tablet, make sure we have it all
    self.check_running_binlog_player(shard_dest_master, 3000, 1000)

    # check destination master query service is not running
    utils.check_tablet_query_service(self, shard_dest_master, False, False)
    stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                          '-count', '1',
                                          shard_dest_master.tablet_alias])
    logging.debug('Got health: %s', str(stream_health))
    self.assertIn('realtime_stats', stream_health)
    self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_dest_master.get_healthz()

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -40 40-80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # now serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # now serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_master, False, True)
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_dest_master)

    # kill the original tablets in the original shards
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly,
                         shard_1_master, shard_1_replica, shard_1_rdonly])
    for t in [shard_0_replica, shard_0_rdonly,
              shard_1_replica, shard_1_rdonly]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    for t in [shard_0_master, shard_1_master]:
      utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                      auto_log=True)

    # delete the original shards
    utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
    utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_2_master, shard_2_replica, shard_2_rdonly,
                         shard_dest_master, shard_dest_replica,
                         shard_dest_rdonly])
Example #28
0
    def test_resharding(self):
        # we're going to reparent and swap these two
        global shard_2_master, shard_2_replica1

        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', '--split_shard_count', '2',
            'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', '-split_shard_count', '4',
            'test_keyspace', 'keyspace_id', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['split_shard_count'], 4)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_0_ny_rdonly.wait_for_vttablet_state('SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_ny_rdonly.wait_for_vttablet_state('SERVING')
        shard_1_rdonly1.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'replica'])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('spare', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None,
                                      target_tablet_type='replica')
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_replica, shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/c0-', shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)

        # the worker will do everything. We test with source_reader_count=10
        # (down from default=20) as connection pool is not big enough for 20.
        # min_table_size_for_split is set to 1 as to force a split even on the
        # small table we have.
        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'SplitClone', '--exclude_tables', 'unrelated',
            '--strategy=-populate_blp_checkpoint', '--source_reader_count',
            '10', '--min_table_size_for_split', '1', 'test_keyspace/80-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        self._check_stream_health_equals_binlog_player_vars(shard_2_master)
        self._check_stream_health_equals_binlog_player_vars(shard_3_master)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master,
                                       seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master,
                                       seconds_behind_master_max=30)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(
            ['RunHealthCheck', shard_3_rdonly1.tablet_alias, 'rdonly'])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('Binlog player state: Running', shard_2_master_status)
        self.assertIn(
            '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br>'
            '<b>Transaction</b>: 2000<br></td>', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low')
        monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high')

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(
            ['RunHealthCheck', shard_1_slave2.tablet_alias, 'replica'])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(
            ['RunHealthCheck', shard_3_master.tablet_alias, 'replica'],
            auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type)

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])

        # update our test variables to point at the new master
        shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

        logging.debug(
            'Inserting lots of data on source shard after reparenting')
        self._insert_lots(3000, base=2000)
        logging.debug('Checking 80 percent of data was sent fairly quickly')
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug('DELAY 1: %s max_lag=%d avg_lag=%d',
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug('DELAY 2: %s max_lag=%d avg_lag=%d',
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('No binlog player is running', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly1
        ])
Example #29
0
    def test_resharding(self):
        # create the keyspace with just one shard
        shard_master.init_tablet('replica',
                                 keyspace='test_keyspace',
                                 shard='0',
                                 tablet_index=0)
        shard_replica.init_tablet('replica',
                                  keyspace='test_keyspace',
                                  shard='0',
                                  tablet_index=1)
        shard_rdonly1.init_tablet('rdonly',
                                  keyspace='test_keyspace',
                                  shard='0',
                                  tablet_index=2)

        for t in [shard_master, shard_replica, shard_rdonly1]:
            t.create_db('vt_test_keyspace')

        # replica is not started, InitShardMaster should timeout
        shard_master.start_vttablet(wait_for_state=None,
                                    binlog_use_v3_resharding_mode=False)
        shard_rdonly1.start_vttablet(wait_for_state=None,
                                     binlog_use_v3_resharding_mode=False)

        for t in [shard_master, shard_rdonly1]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work - expect fail
        # because replica tablet is not up
        _, stderr = utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            shard_master.tablet_alias
        ],
                                    auto_log=True,
                                    expect_fail=True)

        self.assertIn('tablet test_nj-0000062345 ResetReplication failed',
                      stderr)
        # start replica
        shard_replica.start_vttablet(wait_for_state=None,
                                     binlog_use_v3_resharding_mode=False)

        shard_replica.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            shard_master.tablet_alias
        ],
                        auto_log=True)

        utils.wait_for_tablet_type(shard_replica.tablet_alias, 'replica')
        utils.wait_for_tablet_type(shard_rdonly1.tablet_alias, 'rdonly')
        for t in [shard_master, shard_replica, shard_rdonly1]:
            t.wait_for_vttablet_state('SERVING')

        # create the tables and add startup values
        self._create_schema()
        self._insert_startup_values()

        # reload schema on all tablets so we can query them
        for t in [shard_master, shard_replica, shard_rdonly1]:
            utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

        # We must start vtgate after tablets are up, or else wait until 1min refresh
        # (that is the tablet_refresh_interval parameter for discovery gateway)
        # we want cache_ttl at zero so we re-read the topology for every test query.

        utils.VtGate().start(
            cache_ttl='0',
            tablets=[shard_master, shard_replica, shard_rdonly1])
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteShards,
        # as we're not sharded yet.
        # we have 3 values in the database, asking for 4 splits will get us
        # a single query.
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 1)
        self.assertEqual(s[0]['shard_part']['shards'][0], '0')

        # change the schema, backfill keyspace_id, and change schema again
        self._add_sharding_key_to_schema()
        self._backfill_keyspace_id(shard_master)
        self._mark_sharding_key_not_null()

        # now we can be a sharded keyspace (and propagate to SrvKeyspace)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            base_sharding.keyspace_id_type
        ])
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # run a health check on source replica so it responds to discovery
        utils.run_vtctl(['RunHealthCheck', shard_replica.tablet_alias])

        # create the split shards
        shard_0_master.init_tablet('replica',
                                   keyspace='test_keyspace',
                                   shard='-80',
                                   tablet_index=0)
        shard_0_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='-80',
                                    tablet_index=1)
        shard_0_rdonly1.init_tablet('rdonly',
                                    keyspace='test_keyspace',
                                    shard='-80',
                                    tablet_index=2)
        shard_1_master.init_tablet('replica',
                                   keyspace='test_keyspace',
                                   shard='80-',
                                   tablet_index=0)
        shard_1_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='80-',
                                    tablet_index=1)
        shard_1_rdonly1.init_tablet('rdonly',
                                    keyspace='test_keyspace',
                                    shard='80-',
                                    tablet_index=2)

        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly1,
                shard_1_master, shard_1_replica, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             binlog_use_v3_resharding_mode=False)

        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly1,
                shard_1_master, shard_1_replica, shard_1_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        for t in [shard_0_replica, shard_1_replica]:
            utils.wait_for_tablet_type(t.tablet_alias, 'replica')
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            utils.wait_for_tablet_type(t.tablet_alias, 'rdonly')

        sharded_tablets = [
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master,
            shard_1_replica, shard_1_rdonly1
        ]
        for t in sharded_tablets:
            t.wait_for_vttablet_state('SERVING')

        # must restart vtgate after tablets are up, or else wait until 1min refresh
        # we want cache_ttl at zero so we re-read the topology for every test query.
        utils.vtgate.kill()

        utils.vtgate = None
        utils.VtGate().start(cache_ttl='0',
                             tablets=[
                                 shard_master, shard_replica, shard_rdonly1,
                                 shard_0_master, shard_0_replica,
                                 shard_0_rdonly1, shard_1_master,
                                 shard_1_replica, shard_1_rdonly1
                             ])
        var = None

        # Wait for the endpoints, either local or remote.
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.replica',
                                        1,
                                        var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1, var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica',
                                        1,
                                        var=var)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1, var=var)

        # check the Map Reduce API works correctly, should use ExecuteKeyRanges now,
        # as we are sharded (with just one shard).
        # again, we have 3 values in the database, asking for 4 splits will get us
        # a single query.
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 1)
        self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
        # There must be one empty KeyRange which represents the full keyspace.
        self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
        self.assertEqual(s[0]['key_range_part']['key_ranges'][0], {})

        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -\n'
            'Partitions(replica): -\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)
        utils.run_vtctl(['RunHealthCheck', shard_rdonly1.tablet_alias])

        # Run vtworker as daemon for the following SplitClone commands.
        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            [
                '--cell', 'test_nj', '--command_display_interval', '10ms',
                '--use_v3_resharding_mode=false'
            ],
            auto_log=True)

        # Initial clone (online).
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 3, 0, 0, 0)

        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Modify the destination shard. SplitClone will revert the changes.
        # Delete row 1 (provokes an insert).
        shard_0_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=1',
                              write=True)
        # Delete row 2 (provokes an insert).
        shard_1_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=2',
                              write=True)
        # Update row 3 (provokes an update).
        shard_1_master.mquery(
            'vt_test_keyspace',
            "update resharding1 set msg='msg-not-3' where id=3",
            write=True)
        # Insert row 4 (provokes a delete).
        self._insert_value(shard_1_master, 'resharding1', 4, 'msg4',
                           0xD000000000000000)

        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count',
            '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets',
            '1', 'test_keyspace/0'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 2, 1, 1, 0)
        self.verify_reconciliation_counters(worker_port, 'Offline',
                                            'resharding1', 0, 0, 0, 3)
        # Terminate worker daemon because it is no longer needed.
        utils.kill_sub_process(worker_proc, soft=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check the binlog players are running
        logging.debug('Waiting for binlog players to start on new masters...')
        self.check_destination_master(shard_0_master, ['test_keyspace/0'])
        self.check_destination_master(shard_1_master, ['test_keyspace/0'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_replica, horizontal=True)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_0_master, ['test_keyspace/0'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_1_master, ['test_keyspace/0'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        if base_sharding.use_multi_split_diff:
            logging.debug('Running vtworker MultiSplitDiff for 0')
            utils.run_vtworker([
                '-cell', 'test_nj', '--use_v3_resharding_mode=false',
                'MultiSplitDiff', '--min_healthy_rdonly_tablets', '1',
                'test_keyspace/0'
            ],
                               auto_log=True)
        else:
            logging.debug('Running vtworker SplitDiff for -80')
            utils.run_vtworker([
                '-cell', 'test_nj', '--use_v3_resharding_mode=false',
                'SplitDiff', '--min_healthy_rdonly_tablets', '1',
                'test_keyspace/-80'
            ],
                               auto_log=True)
            logging.debug('Running vtworker SplitDiff for 80-')
            utils.run_vtworker([
                '-cell', 'test_nj', '--use_v3_resharding_mode=false',
                'SplitDiff', '--min_healthy_rdonly_tablets', '1',
                'test_keyspace/80-'
            ],
                               auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for the destination master tablet, make sure we have it all
        self.check_running_binlog_player(shard_0_master, 2000, 2000)
        self.check_running_binlog_player(shard_1_master, 6000, 2000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        expect_fail=True)

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # make sure rdonly tablets are back to serving before hitting vtgate.
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            t.wait_for_vttablet_state('SERVING')

        utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteKeyRanges
        # on both destination shards now.
        # we ask for 2 splits to only have one per shard
        sql = 'select id, msg from resharding1'
        timeout = 10.0
        while True:
            try:
                s = utils.vtgate.split_query(sql, 'test_keyspace', 2)
                break
            except Exception:  # pylint: disable=broad-except
                timeout = utils.wait_step(
                    'vtgate executes split_query properly', timeout)
        self.assertEqual(len(s), 2)
        self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
        self.assertEqual(s[1]['key_range_part']['keyspace'], 'test_keyspace')
        self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
        self.assertEqual(len(s[1]['key_range_part']['key_ranges']), 1)

        # then serve replica from the split shards
        source_tablet = shard_replica
        destination_tablets = [shard_0_replica, shard_1_replica]

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, source_tablet, True, False)
        utils.check_tablet_query_services(self, destination_tablets, False,
                                          True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, source_tablet, False, True)
        utils.check_tablet_query_services(self, destination_tablets, True,
                                          False)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_0_master)
        self.check_no_binlog_player(shard_1_master)

        # make sure we can't delete a shard with tablets
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

        # remove the original tablets in the original shard
        tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
        for t in [shard_replica, shard_rdonly1]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards should be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master,
            shard_1_replica, shard_1_rdonly1
        ])
Example #30
0
  def test_merge_sharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'custom_sharding_key',
                     '--sharding_column_type', keyspace_id_type,
                     '--split_shard_count', '4',
                     'test_keyspace'])

    shard_0_master.init_tablet('master', 'test_keyspace', '-40')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
    shard_1_master.init_tablet('master', 'test_keyspace', '40-80')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
    shard_2_master.init_tablet('master', 'test_keyspace', '80-')
    shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['split_shard_count'], 4)

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)

    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.wait_for_vttablet_state('SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/-40',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/40-80',
                     shard_1_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-',
                     shard_2_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_replica]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'replica'])
    for t in [shard_0_rdonly, shard_1_rdonly]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

    # create the merge shards
    shard_dest_master.init_tablet('master', 'test_keyspace', '-80')
    shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    for t in [shard_dest_replica, shard_dest_rdonly]:
      t.start_vttablet(wait_for_state=None)
    # Start masters with enabled healthcheck (necessary for resolving the
    # destination master).
    shard_dest_master.start_vttablet(wait_for_state=None,
                                     target_tablet_type='replica')
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/-80',
                     shard_dest_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -40 40-80 80-\n'
        'Partitions(rdonly): -40 40-80 80-\n'
        'Partitions(replica): -40 40-80 80-\n',
        keyspace_id_type=keyspace_id_type,
        sharding_column_name='custom_sharding_key')

    # copy the schema
    utils.run_vtctl(['CopySchemaShard', shard_0_rdonly.tablet_alias,
                     'test_keyspace/-80'], auto_log=True)

    # copy the data (will also start filtered replication), reset source
    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        'SplitClone',
                        '--source_reader_count', '10',
                        '--min_table_size_for_split', '1',
                        '--min_healthy_rdonly_endpoints', '1',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check binlog player variables
    self.check_destination_master(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_0_replica, horizontal=True)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True)

    # testing filtered replication: insert a bunch of data on shard 0 and 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shards')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 10)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 30)
    self.check_binlog_player_vars(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_0_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias,
                     'rdonly'])
    logging.debug('Running vtworker SplitDiff on first half')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_endpoints', '1',
                        '--source_uid', '0',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    logging.debug('Running vtworker SplitDiff on second half')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_endpoints', '1',
                        '--source_uid', '1',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)

    # get status for the destination master tablet, make sure we have it all
    self.check_running_binlog_player(shard_dest_master, 3000, 1000)

    # check destination master query service is not running
    utils.check_tablet_query_service(self, shard_dest_master, False, False)
    stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                          '-count', '1',
                                          shard_dest_master.tablet_alias])
    logging.debug('Got health: %s', str(stream_health))
    self.assertIn('realtime_stats', stream_health)
    self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_dest_master.get_healthz()

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -40 40-80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_sharding_key')

    # now serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_sharding_key')

    # now serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_sharding_key')
    utils.check_tablet_query_service(self, shard_0_master, False, True)
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_dest_master)

    # kill the original tablets in the original shards
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly,
                         shard_1_master, shard_1_replica, shard_1_rdonly])
    for t in [shard_0_replica, shard_0_rdonly,
              shard_1_replica, shard_1_rdonly]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    for t in [shard_0_master, shard_1_master]:
      utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                      auto_log=True)

    # delete the original shards
    utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
    utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_2_master, shard_2_replica, shard_2_rdonly,
                         shard_dest_master, shard_dest_replica,
                         shard_dest_rdonly])
Example #31
0
    def test_resharding(self):
        # create the keyspace with just one shard
        shard_master.init_tablet('master',
                                 keyspace='test_keyspace',
                                 shard='0',
                                 tablet_index=0)
        shard_replica.init_tablet('replica',
                                  keyspace='test_keyspace',
                                  shard='0',
                                  tablet_index=1)
        shard_rdonly1.init_tablet('rdonly',
                                  keyspace='test_keyspace',
                                  shard='0',
                                  tablet_index=2)

        for t in [shard_master, shard_replica, shard_rdonly1]:
            t.create_db('vt_test_keyspace')

        shard_master.start_vttablet(wait_for_state=None)
        shard_replica.start_vttablet(wait_for_state=None)
        shard_rdonly1.start_vttablet(wait_for_state=None)

        shard_master.wait_for_vttablet_state('SERVING')
        for t in [shard_replica, shard_rdonly1]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/0',
            shard_master.tablet_alias
        ],
                        auto_log=True)

        utils.wait_for_tablet_type(shard_replica.tablet_alias, 'replica')
        utils.wait_for_tablet_type(shard_rdonly1.tablet_alias, 'rdonly')
        for t in [shard_master, shard_replica, shard_rdonly1]:
            t.wait_for_vttablet_state('SERVING')

        # create the tables and add startup values
        self._create_schema()
        self._insert_startup_values()

        # reload schema on all tablets so we can query them
        for t in [shard_master, shard_replica, shard_rdonly1]:
            utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

        # must start vtgate after tablets are up, or else wait until 1min refresh
        # we want cache_ttl at zero so we re-read the topology for every test query.
        utils.VtGate().start(
            cache_ttl='0',
            tablets=[shard_master, shard_replica, shard_rdonly1])
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteShards,
        # as we're not sharded yet.
        # we have 3 values in the database, asking for 4 splits will get us
        # a single query.
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 1)
        self.assertEqual(s[0]['shard_part']['shards'][0], '0')

        # change the schema, backfill keyspace_id, and change schema again
        self._add_sharding_key_to_schema()
        self._backfill_keyspace_id(shard_master)
        self._mark_sharding_key_not_null()

        # now we can be a sharded keyspace (and propagate to SrvKeyspace)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id',
            keyspace_id_type
        ])
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # run a health check on source replica so it responds to discovery
        utils.run_vtctl(['RunHealthCheck', shard_replica.tablet_alias])

        # create the split shards
        shard_0_master.init_tablet('master',
                                   keyspace='test_keyspace',
                                   shard='-80',
                                   tablet_index=0)
        shard_0_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='-80',
                                    tablet_index=1)
        shard_0_rdonly1.init_tablet('rdonly',
                                    keyspace='test_keyspace',
                                    shard='-80',
                                    tablet_index=2)
        shard_1_master.init_tablet('master',
                                   keyspace='test_keyspace',
                                   shard='80-',
                                   tablet_index=0)
        shard_1_replica.init_tablet('replica',
                                    keyspace='test_keyspace',
                                    shard='80-',
                                    tablet_index=1)
        shard_1_rdonly1.init_tablet('rdonly',
                                    keyspace='test_keyspace',
                                    shard='80-',
                                    tablet_index=2)

        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_replica
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        for t in [shard_0_master, shard_1_master]:
            t.wait_for_vttablet_state('SERVING')
        for t in [
                shard_0_replica, shard_0_rdonly1, shard_1_replica,
                shard_1_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        for t in [shard_0_replica, shard_1_replica]:
            utils.wait_for_tablet_type(t.tablet_alias, 'replica')
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            utils.wait_for_tablet_type(t.tablet_alias, 'rdonly')

        sharded_tablets = [
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master,
            shard_1_replica, shard_1_rdonly1
        ]
        for t in sharded_tablets:
            t.wait_for_vttablet_state('SERVING')

        # must restart vtgate after tablets are up, or else wait until 1min refresh
        # we want cache_ttl at zero so we re-read the topology for every test query.
        utils.vtgate.kill()
        utils.VtGate().start(cache_ttl='0',
                             tablets=[
                                 shard_master, shard_replica, shard_rdonly1,
                                 shard_0_master, shard_0_replica,
                                 shard_0_rdonly1, shard_1_master,
                                 shard_1_replica, shard_1_rdonly1
                             ])
        utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteKeyRanges now,
        # as we are sharded (with just one shard).
        # again, we have 3 values in the database, asking for 4 splits will get us
        # a single query.
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
        self.assertEqual(len(s), 1)
        self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
        # There must be one empty KeyRange which represents the full keyspace.
        self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
        self.assertEqual(s[0]['key_range_part']['key_ranges'][0], {})

        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace', 'Partitions(master): -\n'
                                 'Partitions(rdonly): -\n'
                                 'Partitions(replica): -\n',
                                 keyspace_id_type=keyspace_id_type)

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)
        utils.run_vtctl(['RunHealthCheck', shard_rdonly1.tablet_alias])

        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'SplitClone', '--exclude_tables', 'unrelated',
            '--source_reader_count', '10', '--min_table_size_for_split', '1',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/0'
        ],
                           auto_log=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check the binlog players are running
        logging.debug('Waiting for binlog players to start on new masters...')
        self.check_destination_master(shard_0_master, ['test_keyspace/0'])
        self.check_destination_master(shard_1_master, ['test_keyspace/0'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_replica, horizontal=True)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_0_master, ['test_keyspace/0'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_1_master, ['test_keyspace/0'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_replica,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data
        logging.debug('Running vtworker SplitDiff for -80')
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--min_healthy_rdonly_tablets',
            '1', 'test_keyspace/-80'
        ],
                           auto_log=True)

        logging.debug('Running vtworker SplitDiff for 80-')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--min_healthy_rdonly_tablets',
            '1', 'test_keyspace/80-'
        ],
                           auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for the destination master tablet, make sure we have it all
        self.check_running_binlog_player(shard_0_master, 2000, 2000)
        self.check_running_binlog_player(shard_1_master, 6000, 2000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        expect_fail=True)

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace', 'Partitions(master): -\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -\n',
                                 keyspace_id_type=keyspace_id_type)

        # make sure rdonly tablets are back to serving before hitting vtgate.
        for t in [shard_0_rdonly1, shard_1_rdonly1]:
            t.wait_for_vttablet_state('SERVING')
        utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1)
        utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1)

        # check the Map Reduce API works correctly, should use ExecuteKeyRanges
        # on both destination shards now.
        # we ask for 2 splits to only have one per shard
        sql = 'select id, msg from resharding1'
        s = utils.vtgate.split_query(sql, 'test_keyspace', 2)
        self.assertEqual(len(s), 2)
        self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
        self.assertEqual(s[1]['key_range_part']['keyspace'], 'test_keyspace')
        self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
        self.assertEqual(len(s[1]['key_range_part']['key_ranges']), 1)

        # then serve replica from the split shards
        source_tablet = shard_replica
        destination_tablets = [shard_0_replica, shard_1_replica]

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace', 'Partitions(master): -\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, source_tablet, True, False)
        utils.check_tablet_query_services(self, destination_tablets, False,
                                          True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace', 'Partitions(master): -\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -\n',
                                 keyspace_id_type=keyspace_id_type)

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, source_tablet, False, True)
        utils.check_tablet_query_services(self, destination_tablets, True,
                                          False)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace', 'Partitions(master): -\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_0_master)
        self.check_no_binlog_player(shard_1_master)

        # make sure we can't delete a shard with tablets
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

        # remove the original tablets in the original shard
        tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
        for t in [shard_replica, shard_rdonly1]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_1_master,
            shard_1_replica, shard_1_rdonly1
        ])
Example #32
0
  def test_merge_sharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'custom_ksid_col',
                     '--sharding_column_type', base_sharding.keyspace_id_type,
                     'test_keyspace'])

    shard_0_master.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-40')
    shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-40')
    shard_1_master.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '40-80')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '40-80')
    shard_2_master.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_2_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    # rebuild and check SrvKeyspace
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None,
                       binlog_use_v3_resharding_mode=False)

    # won't be serving, no replication state
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly,
              shard_2_master, shard_2_replica, shard_2_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-40',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/40-80',
                     shard_1_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
                     shard_2_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_replica]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
    for t in [shard_0_rdonly, shard_1_rdonly]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

    # create the merge shards
    shard_dest_master.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_dest_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')

    # start vttablet on the destination shard (no db created,
    # so they're all not serving)
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.start_vttablet(wait_for_state=None,
                       binlog_use_v3_resharding_mode=False)
    for t in [shard_dest_master, shard_dest_replica, shard_dest_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
                     shard_dest_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -40 40-80 80-\n'
        'Partitions(rdonly): -40 40-80 80-\n'
        'Partitions(replica): -40 40-80 80-\n',
        keyspace_id_type=base_sharding.keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    # copy the schema
    utils.run_vtctl(['CopySchemaShard', shard_0_rdonly.tablet_alias,
                     'test_keyspace/-80'], auto_log=True)

    # copy the data (will also start filtered replication), reset source
    # Run vtworker as daemon for the following SplitClone commands.
    worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
        ['--cell', 'test_nj', '--command_display_interval', '10ms',
          '--use_v3_resharding_mode=false'],
        auto_log=True)

    # Initial clone (online).
    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--offline=false',
         '--chunk_count', '10',
         '--min_rows_per_chunk', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/-80'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        2, 0, 0, 0)

    # Reset vtworker such that we can run the next command.
    workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
    utils.wait_procs([workerclient_proc])

    # Modify the destination shard. SplitClone will revert the changes.
    # Delete row 1 (provokes an insert).
    shard_dest_master.mquery('vt_test_keyspace',
                             'delete from resharding1 where id=1', write=True)
    # Update row 2 (provokes an update).
    shard_dest_master.mquery(
        'vt_test_keyspace', "update resharding1 set msg='msg-not-2' where id=2",
        write=True)
    # Insert row 0 (provokes a delete).
    self._insert_value(shard_dest_master, 'resharding1', 0, 'msg0',
                       0x5000000000000000)

    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--chunk_count', '10',
         '--min_rows_per_chunk', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/-80'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    # Change tablets, which were taken offline, back to rdonly.
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        1, 1, 1, 0)
    self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1',
                                        0, 0, 0, 2)
    # Terminate worker daemon because it is no longer needed.
    utils.kill_sub_process(worker_proc, soft=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check binlog player variables
    self.check_destination_master(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_0_replica, horizontal=True)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True)

    # testing filtered replication: insert a bunch of data on shard 0 and 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shards')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 10)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 30)
    self.check_binlog_player_vars(shard_dest_master,
                                  ['test_keyspace/-40', 'test_keyspace/40-80'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_0_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)
    self.check_binlog_server_vars(shard_1_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_dest_rdonly.tablet_alias])
    logging.debug('Running vtworker SplitDiff on first half')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--source_uid', '1',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)
    logging.debug('Running vtworker SplitDiff on second half')
    utils.run_vtworker(['-cell', 'test_nj',
                        '--use_v3_resharding_mode=false',
                        'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--source_uid', '2',
                        'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_dest_rdonly.tablet_alias,
                     'rdonly'], auto_log=True)

    # get status for the destination master tablet, make sure we have it all
    self.check_running_binlog_player(shard_dest_master, 3000, 1000)

    # check destination master query service is not running
    utils.check_tablet_query_service(self, shard_dest_master, False, False)
    stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                          '-count', '1',
                                          shard_dest_master.tablet_alias])
    logging.debug('Got health: %s', str(stream_health))
    self.assertIn('realtime_stats', stream_health)
    self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_dest_master.get_healthz()

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -40 40-80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # now serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -40 40-80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # now serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/-80', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_master, False, True)
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_dest_master)

    # kill the original tablets in the original shards
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly,
                         shard_1_master, shard_1_replica, shard_1_rdonly])
    for t in [shard_0_replica, shard_0_rdonly,
              shard_1_replica, shard_1_rdonly]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    for t in [shard_0_master, shard_1_master]:
      utils.run_vtctl(['DeleteTablet', '-allow_master', t.tablet_alias],
                      auto_log=True)

    # delete the original shards
    utils.run_vtctl(['DeleteShard', 'test_keyspace/-40'], auto_log=True)
    utils.run_vtctl(['DeleteShard', 'test_keyspace/40-80'], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards should be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_2_master, shard_2_replica, shard_2_rdonly,
                         shard_dest_master, shard_dest_replica,
                         shard_dest_rdonly])
Example #33
0
  def test_resharding(self):
    # create the keyspace with just one shard
    utils.run_vtctl(['CreateKeyspace',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                     'keyspace_id', keyspace_id_type])

    shard_master.init_tablet( 'master',  'test_keyspace', '0')
    shard_replica.init_tablet('replica', 'test_keyspace', '0')
    shard_rdonly1.init_tablet( 'rdonly',  'test_keyspace', '0')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # create databases so vttablet can start behaving normally
    for t in [shard_master, shard_replica, shard_rdonly1]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)

    # wait for the tablets
    shard_master.wait_for_vttablet_state('SERVING')
    shard_replica.wait_for_vttablet_state('SERVING')
    shard_rdonly1.wait_for_vttablet_state('SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0',
                     shard_master.tablet_alias], auto_log=True)

    # create the tables and add startup values
    self._create_schema()
    self._insert_startup_values()

    # change the schema, backfill keyspace_id, and change schema again
    self._add_sharding_key_to_schema()
    self._backfill_keyspace_id(shard_master)
    self._mark_sharding_key_not_null()

    # create the split shards
    shard_0_master.init_tablet( 'master',  'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_rdonly1.init_tablet( 'rdonly',  'test_keyspace', '-80')
    shard_1_master.init_tablet( 'master',  'test_keyspace', '80-')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_rdonly1.init_tablet( 'rdonly',  'test_keyspace', '80-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
              shard_1_master, shard_1_replica, shard_1_rdonly1]:
      t.start_vttablet(wait_for_state=None)
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
              shard_1_master, shard_1_replica, shard_1_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -\n' +
                             'Partitions(replica): -\n',
                             keyspace_id_type=keyspace_id_type)

    # we need to create the schema, and the worker will do data copying
    for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
      utils.run_vtctl(['CopySchemaShard',
                       '--exclude_tables', 'unrelated',
                       shard_rdonly1.tablet_alias,
                       keyspace_shard],
                      auto_log=True)

    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        'SplitClone',
                        '--exclude_tables' ,'unrelated',
                        '--strategy=-populate_blp_checkpoint',
                        '--source_reader_count', '10',
                        '--min_table_size_for_split', '1',
                        'test_keyspace/0'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
                     auto_log=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check the binlog players are running
    logging.debug("Waiting for binlog players to start on new masters...")
    shard_0_master.wait_for_binlog_player_count(1)
    shard_1_master.wait_for_binlog_player_count(1)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug("Inserting lots of data on source shard")
    self._insert_lots(1000)
    logging.debug("Checking 80 percent of data is sent quickly")
    self._check_lots_timeout(1000, 80, 5)
    logging.debug("Checking all data goes through eventually")
    self._check_lots_timeout(1000, 100, 20)
    logging.debug("Checking no data was sent the wrong way")
    self._check_lots_not_present(1000)

    # use the vtworker checker to compare the data
    logging.debug("Running vtworker SplitDiff for -80")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    logging.debug("Running vtworker SplitDiff for 80-")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/80-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause("Good time to test vtworker for diffs")

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    expect_fail=True)

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -\n',
                             keyspace_id_type=keyspace_id_type)

    # then serve replica from the split shards
    source_tablet = shard_replica
    destination_tablets = [shard_0_replica, shard_1_replica]

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)

    # move replica back and forth
    utils.run_vtctl(['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    # After a backwards migration, queryservice should be enabled on source and disabled on destinations
    utils.check_tablet_query_service(self, source_tablet, True, False)
    utils.check_tablet_query_services(self, destination_tablets, False, True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -\n',
                             keyspace_id_type=keyspace_id_type)

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    # After a forwards migration, queryservice should be disabled on source and enabled on destinations
    utils.check_tablet_query_service(self, source_tablet, False, True)
    utils.check_tablet_query_services(self, destination_tablets, True, False)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)


    # then serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)

    # check the binlog players are gone now
    shard_0_master.wait_for_binlog_player_count(0)
    shard_1_master.wait_for_binlog_player_count(0)

    # make sure we can't delete a shard with tablets
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

    # scrap the original tablets in the original shard
    for t in [shard_master, shard_replica, shard_rdonly1]:
      utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
    tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
    for t in [shard_master, shard_replica, shard_rdonly1]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly1,
                         shard_1_master, shard_1_replica, shard_1_rdonly1])
Example #34
0
  def test_resharding(self):
    # create the keyspace with just one shard
    utils.run_vtctl(['CreateKeyspace',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                     'keyspace_id', keyspace_id_type])

    shard_master.init_tablet( 'master',  'test_keyspace', '0')
    shard_replica.init_tablet('replica', 'test_keyspace', '0')
    shard_rdonly.init_tablet( 'rdonly',  'test_keyspace', '0')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # create databases so vttablet can start behaving normally
    for t in [shard_master, shard_replica, shard_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)

    # wait for the tablets
    shard_master.wait_for_vttablet_state('SERVING')
    shard_replica.wait_for_vttablet_state('SERVING')
    shard_rdonly.wait_for_vttablet_state('SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0',
                     shard_master.tablet_alias], auto_log=True)

    # create the tables and add startup values
    self._create_schema()
    self._insert_startup_values()

    # change the schema, backfill keyspace_id, and change schema again
    self._add_sharding_key_to_schema()
    self._backfill_keyspace_id(shard_master)
    self._mark_sharding_key_not_null()

    # create the split shards
    shard_0_master.init_tablet( 'master',  'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_rdonly.init_tablet( 'rdonly',  'test_keyspace', '-80')
    shard_1_master.init_tablet( 'master',  'test_keyspace', '80-')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_rdonly.init_tablet( 'rdonly',  'test_keyspace', '80-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly]:
      t.start_vttablet(wait_for_state=None)
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly]:
      t.wait_for_vttablet_state('CONNECTING')

    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -\n' +
                             'Partitions(replica): -\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # take the snapshot for the split
    utils.run_vtctl(['MultiSnapshot', '--spec=-80-',
                     shard_replica.tablet_alias], auto_log=True)

    # wait for tablet's binlog server service to be enabled after snapshot
    shard_replica.wait_for_binlog_server_state("Enabled")

    # perform the restore.
    utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
                     'test_keyspace/-80', shard_replica.tablet_alias],
                    auto_log=True)
    utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
                     'test_keyspace/80-', shard_replica.tablet_alias],
                    auto_log=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check the binlog players are running
    shard_0_master.wait_for_binlog_player_count(1)
    shard_1_master.wait_for_binlog_player_count(1)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug("Inserting lots of data on source shard")
    self._insert_lots(1000)
    logging.debug("Checking 80 percent of data is sent quickly")
    self._check_lots_timeout(1000, 80, 5)
    logging.debug("Checking all data goes through eventually")
    self._check_lots_timeout(1000, 100, 20)
    logging.debug("Checking no data was sent the wrong way")
    self._check_lots_not_present(1000)

    # use the vtworker checker to compare the data
    logging.debug("Running vtworker SplitDiff for -80")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/-80'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)

    logging.debug("Running vtworker SplitDiff for 80-")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/80-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause("Good time to test vtworker for diffs")

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    expect_fail=True)

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # then serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # move replica back and forth
    utils.run_vtctl(['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # then serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # check the binlog players are gone now
    shard_0_master.wait_for_binlog_player_count(0)
    shard_1_master.wait_for_binlog_player_count(0)

    # make sure we can't delete a shard with tablets
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

    # scrap the original tablets in the original shard
    for t in [shard_master, shard_replica, shard_rdonly]:
      utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
    tablet.kill_tablets([shard_master, shard_replica, shard_rdonly])

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly,
                         shard_1_master, shard_1_replica, shard_1_rdonly])
Example #35
0
    def test_resharding(self):
        # we're going to reparent and swap these two
        global shard_2_master, shard_2_replica1

        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'custom_ksid_col', base_sharding.keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = (
            base_sharding.keyspace_id_type == keyrange_constants.KIT_BYTES)

        # create databases so vttablet can start behaving somewhat normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets (replication is not setup, the slaves won't be
        # healthy)
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('NOT_SERVING')
        shard_0_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')
        shard_1_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
        shard_1_rdonly1.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/-80', shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-', shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
        self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_2_master.start_vttablet(wait_for_state=None)
        shard_3_master.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
                shard_3_replica, shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_2_rdonly1, shard_3_master, shard_3_replica,
                shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', 'test_keyspace/c0-', shard_3_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        for s in ['-80', '80-', '80-c0', 'c0-']:
            self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # disable shard_1_slave2, so we're sure filtered replication will go
        # from shard_1_slave1
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        # Run vtworker as daemon for the following SplitClone commands.
        worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
            ['--cell', 'test_nj', '--command_display_interval', '10ms'],
            auto_log=True)

        # Copy the data from the source to the destination shards.
        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        #
        # Initial clone (online).
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 2, 0, 0, 0)

        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Test the correct handling of keyspace_id changes which happen after
        # the first clone.
        # Let row 2 go to shard 3 instead of shard 2.
        shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set'
                              ' custom_ksid_col=0xD000000000000000 WHERE id=2',
                              write=True)
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Row 2 will be deleted from shard 2 and inserted to shard 3.
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 0, 1, 1)
        self._check_value(shard_2_master,
                          'resharding1',
                          2,
                          'msg2',
                          0xD000000000000000,
                          should_be_here=False)
        self._check_value(shard_3_master, 'resharding1', 2, 'msg2',
                          0xD000000000000000)
        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Move row 2 back to shard 2 from shard 3 by changing the keyspace_id again.
        shard_1_master.mquery('vt_test_keyspace', 'update resharding1 set'
                              ' custom_ksid_col=0x9000000000000000 WHERE id=2',
                              write=True)
        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--offline=false', '--exclude_tables', 'unrelated',
            '--chunk_count', '10', '--min_rows_per_chunk', '1',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Row 2 will be deleted from shard 3 and inserted to shard 2.
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 0, 1, 1)
        self._check_value(shard_2_master, 'resharding1', 2, 'msg2',
                          0x9000000000000000)
        self._check_value(shard_3_master,
                          'resharding1',
                          2,
                          'msg2',
                          0x9000000000000000,
                          should_be_here=False)
        # Reset vtworker such that we can run the next command.
        workerclient_proc = utils.run_vtworker_client_bg(['Reset'],
                                                         worker_rpc_port)
        utils.wait_procs([workerclient_proc])

        # Modify the destination shard. SplitClone will revert the changes.
        # Delete row 2 (provokes an insert).
        shard_2_master.mquery('vt_test_keyspace',
                              'delete from resharding1 where id=2',
                              write=True)
        # Update row 3 (provokes an update).
        shard_3_master.mquery(
            'vt_test_keyspace',
            "update resharding1 set msg='msg-not-3' where id=3",
            write=True)
        # Insert row 4 and 5 (provokes a delete).
        self._insert_value(shard_3_master, 'resharding1', 4, 'msg4',
                           0xD000000000000000)
        self._insert_value(shard_3_master, 'resharding1', 5, 'msg5',
                           0xD000000000000000)

        workerclient_proc = utils.run_vtworker_client_bg([
            'SplitClone', '--exclude_tables', 'unrelated', '--chunk_count',
            '10', '--min_rows_per_chunk', '1', '--min_healthy_rdonly_tablets',
            '1', '--max_tps', '9999', 'test_keyspace/80-'
        ], worker_rpc_port)
        utils.wait_procs([workerclient_proc])
        # Change tablet, which was taken offline, back to rdonly.
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        self.verify_reconciliation_counters(worker_port, 'Online',
                                            'resharding1', 1, 1, 2, 0)
        self.verify_reconciliation_counters(worker_port, 'Offline',
                                            'resharding1', 0, 0, 0, 2)
        # Terminate worker daemon because it is no longer needed.
        utils.kill_sub_process(worker_proc, soft=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
        self.check_destination_master(shard_3_master, ['test_keyspace/80-'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

        # Check that the throttler was enabled.
        self.check_throttler_service(shard_2_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)
        self.check_throttler_service(shard_3_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_1_slave1,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for destination master tablets, make sure we have it all
        self.check_running_binlog_player(shard_2_master, 4000, 2000)
        self.check_running_binlog_player(shard_3_master, 4000, 2000)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 1, 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 2, 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low', 1)
        monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high', 2)

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)
        self.check_binlog_server_vars(shard_1_slave2,
                                      horizontal=True,
                                      min_statements=800,
                                      min_transactions=800)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                        auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace(
            'test_ny',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace(
            'test_ny',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'PlannedReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])

        # update our test variables to point at the new master
        shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

        logging.debug(
            'Inserting lots of data on source shard after reparenting')
        self._insert_lots(3000, base=2000)
        logging.debug('Checking 80 percent of data was sent fairly quickly')
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug(
            'DELAY 1: %s max_lag=%d ms avg_lag=%d ms',
            monitor_thread_1.thread_name, monitor_thread_1.max_lag_ms,
            monitor_thread_1.lag_sum_ms / monitor_thread_1.sample_count)
        logging.debug(
            'DELAY 2: %s max_lag=%d ms avg_lag=%d ms',
            monitor_thread_2.thread_name, monitor_thread_2.max_lag_ms,
            monitor_thread_2.lag_sum_ms / monitor_thread_2.sample_count)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace', 'Partitions(master): -80 80-c0 c0-\n'
            'Partitions(rdonly): -80 80-c0 c0-\n'
            'Partitions(replica): -80 80-c0 c0-\n',
            keyspace_id_type=base_sharding.keyspace_id_type,
            sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_2_master)
        self.check_no_binlog_player(shard_3_master)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
            shard_3_master, shard_3_replica, shard_3_rdonly1
        ])
Example #36
0
  def test_resharding(self):
    # we're going to reparent and swap these two
    global shard_2_master, shard_2_replica1

    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'bad_column',
                     '--sharding_column_type', 'bytes',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'custom_ksid_col', 'uint64'], expect_fail=True)
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force',
                     'test_keyspace',
                     'custom_ksid_col', base_sharding.keyspace_id_type])

    shard_0_master.init_tablet('master', 'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
    shard_1_master.init_tablet('master', 'test_keyspace', '80-')
    shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
    shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

    # we set full_mycnf_args to True as a test in the KIT_BYTES case
    full_mycnf_args = (base_sharding.keyspace_id_type ==
                       keyrange_constants.KIT_BYTES)

    # create databases so vttablet can start behaving somewhat normally
    for t in [shard_0_master, shard_0_replica, shard_0_ny_rdonly,
              shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
              shard_1_rdonly1]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args)

    # wait for the tablets (replication is not setup, the slaves won't be
    # healthy)
    shard_0_master.wait_for_vttablet_state('SERVING')
    shard_0_replica.wait_for_vttablet_state('NOT_SERVING')
    shard_0_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
    shard_1_master.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')
    shard_1_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
    shard_1_rdonly1.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    # check the shards
    shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
    self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
    self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
    self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

    # create the tables
    self._create_schema()
    self._insert_startup_values()
    self._test_keyrange_constraints()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_slave1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
    for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

    # create the split shards
    shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
    shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
    shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
    shard_2_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-c0')
    shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
    shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
    shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    shard_2_master.start_vttablet(wait_for_state=None)
    shard_3_master.start_vttablet(wait_for_state=None)
    for t in [shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
              shard_3_replica, shard_3_rdonly1]:
      t.start_vttablet(wait_for_state=None)
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_2_rdonly1,
              shard_3_master, shard_3_replica, shard_3_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-c0',
                     shard_2_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/c0-',
                     shard_3_master.tablet_alias], auto_log=True)

    # check the shards
    shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
    for s in ['-80', '80-', '80-c0', 'c0-']:
      self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
    self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n',
        keyspace_id_type=base_sharding.keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    # disable shard_1_slave2, so we're sure filtered replication will go
    # from shard_1_slave1
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

    # we need to create the schema, and the worker will do data copying
    for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
      utils.run_vtctl(['CopySchemaShard', '--exclude_tables', 'unrelated',
                       shard_1_rdonly1.tablet_alias, keyspace_shard],
                      auto_log=True)

    # Run vtworker as daemon for the following SplitClone commands.
    worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
        ['--cell', 'test_nj', '--command_display_interval', '10ms'],
        auto_log=True)

    # Copy the data from the source to the destination shards.
    # min_table_size_for_split is set to 1 as to force a split even on the
    # small table we have.
    # --max_tps is only specified to enable the throttler and ensure that the
    # code is executed. But the intent here is not to throttle the test, hence
    # the rate limit is set very high.
    #
    # Initial clone (online).
    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--offline=false',
         '--exclude_tables', 'unrelated',
         '--min_table_size_for_split', '1',
         '--min_healthy_rdonly_tablets', '1',
         '--max_tps', '9999',
         'test_keyspace/80-'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        2, 0, 0)

    # Reset vtworker such that we can run the next command.
    workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
    utils.wait_procs([workerclient_proc])

    # Modify the destination shard. SplitClone will revert the changes.
    # Delete row 2 (provokes an insert).
    shard_2_master.mquery('vt_test_keyspace',
                          'delete from resharding1 where id=2', write=True)
    # Update row 3 (provokes an update).
    shard_3_master.mquery('vt_test_keyspace',
                          "update resharding1 set msg='msg-not-3' where id=3",
                          write=True)
    # Insert row 4 (provokes a delete).
    self._insert_value(shard_3_master, 'resharding1', 4, 'msg4',
                       0xD000000000000000)

    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--exclude_tables', 'unrelated',
         '--min_table_size_for_split', '1',
         '--min_healthy_rdonly_tablets', '1',
         '--max_tps', '9999',
         'test_keyspace/80-'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    # Change tablet, which was taken offline, back to rdonly.
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias,
                     'rdonly'], auto_log=True)
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        1, 1, 1)
    self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1',
                                        0, 0, 0)
    # Terminate worker daemon because it is no longer needed.
    utils.kill_sub_process(worker_proc, soft=True)

    # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', '--exclude_tables=unrelated',
                     'test_keyspace'], auto_log=True)

    # check the binlog players are running and exporting vars
    self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
    self.check_destination_master(shard_3_master, ['test_keyspace/80-'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

    # Check that the throttler was enabled.
    self.check_binlog_throttler(shard_2_master.rpc_endpoint(),
                                ['BinlogPlayer/0'], 9999)
    self.check_binlog_throttler(shard_3_master.rpc_endpoint(),
                                ['BinlogPlayer/0'], 9999)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 5)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 20)
    logging.debug('Checking no data was sent the wrong way')
    self._check_lots_not_present(1000)
    self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                  seconds_behind_master_max=30)
    self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_1_slave1, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
    logging.debug('Running vtworker SplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        'test_keyspace/c0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # get status for destination master tablets, make sure we have it all
    self.check_running_binlog_player(shard_2_master, 4000, 2000)
    self.check_running_binlog_player(shard_3_master, 4000, 2000)

    # start a thread to insert data into shard_1 in the background
    # with current time, and monitor the delay
    insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 1, 10000,
                                   0x9000000000000000)
    insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 2, 10001,
                                   0xD000000000000000)
    monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low', 1)
    monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high', 2)

    # tests a failover switching serving to a different replica
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
    utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

    # test data goes through again
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000, base=1000)
    logging.debug('Checking 80 percent of data was sent quickly')
    self._check_lots_timeout(1000, 80, 5, base=1000)
    self.check_binlog_server_vars(shard_1_slave2, horizontal=True,
                                  min_statements=800, min_transactions=800)

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    expect_fail=True)

    # check query service is off on master 2 and master 3, as filtered
    # replication is enabled. Even health check that is enabled on
    # master 3 should not interfere (we run it to be sure).
    utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                    auto_log=True)
    for master in [shard_2_master, shard_3_master]:
      utils.check_tablet_query_service(self, master, False, False)
      stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                            '-count', '1',
                                            master.tablet_alias])
      logging.debug('Got health: %s', str(stream_health))
      self.assertIn('realtime_stats', stream_health)
      self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_3_master.get_healthz()

    # now serve rdonly from the split shards, in test_nj only
    utils.run_vtctl(['MigrateServedTypes', '--cells=test_nj',
                     'test_keyspace/80-', 'rdonly'], auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_srv_keyspace('test_ny', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

    # now serve rdonly from the split shards, everywhere
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_srv_keyspace('test_ny', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
    utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

    # then serve replica from the split shards
    destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_1_slave2, False, True)

    # move replica back and forth
    utils.run_vtctl(
        ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
        auto_log=True)
    # After a backwards migration, queryservice should be enabled on
    # source and disabled on destinations
    utils.check_tablet_query_service(self, shard_1_slave2, True, False)
    # Destination tablets would have query service disabled for other
    # reasons than the migration, so check the shard record instead of
    # the tablets directly.
    utils.check_shard_query_services(self, destination_shards,
                                     topodata_pb2.REPLICA, False)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    # After a forwards migration, queryservice should be disabled on
    # source and enabled on destinations
    utils.check_tablet_query_service(self, shard_1_slave2, False, True)
    # Destination tablets would have query service disabled for other
    # reasons than the migration, so check the shard record instead of
    # the tablets directly
    utils.check_shard_query_services(self, destination_shards,
                                     topodata_pb2.REPLICA, True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # reparent shard_2 to shard_2_replica1, then insert more data and
    # see it flow through still
    utils.run_vtctl(['PlannedReparentShard', 'test_keyspace/80-c0',
                     shard_2_replica1.tablet_alias])

    # update our test variables to point at the new master
    shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

    logging.debug('Inserting lots of data on source shard after reparenting')
    self._insert_lots(3000, base=2000)
    logging.debug('Checking 80 percent of data was sent fairly quickly')
    self._check_lots_timeout(3000, 80, 10, base=2000)

    # use vtworker to compare the data again
    logging.debug('Running vtworker SplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        'test_keyspace/c0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    # going to migrate the master now, check the delays
    monitor_thread_1.done = True
    monitor_thread_2.done = True
    insert_thread_1.done = True
    insert_thread_2.done = True
    logging.debug('DELAY 1: %s max_lag=%d ms avg_lag=%d ms',
                  monitor_thread_1.thread_name,
                  monitor_thread_1.max_lag_ms,
                  monitor_thread_1.lag_sum_ms / monitor_thread_1.sample_count)
    logging.debug('DELAY 2: %s max_lag=%d ms avg_lag=%d ms',
                  monitor_thread_2.thread_name,
                  monitor_thread_2.max_lag_ms,
                  monitor_thread_2.lag_sum_ms / monitor_thread_2.sample_count)

    # mock with the SourceShard records to test 'vtctl SourceShardDelete'
    # and 'vtctl SourceShardAdd'
    utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                    auto_log=True)
    utils.run_vtctl(['SourceShardAdd', '--key_range=80-',
                     'test_keyspace/c0-', '0', 'test_keyspace/80-'],
                    auto_log=True)

    # then serve master from the split shards, make sure the source master's
    # query service is now turned off
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-c0 c0-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_2_master)
    self.check_no_binlog_player(shard_3_master)

    # delete the original tablets in the original shard
    tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2,
                         shard_1_ny_rdonly, shard_1_rdonly1])
    for t in [shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
              shard_1_rdonly1]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    utils.run_vtctl(['DeleteTablet', '-allow_master',
                     shard_1_master.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(
        ['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # test RemoveShardCell
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True,
        expect_fail=True)
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True)
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True)
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
    self.assertNotIn('cells', shard)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

    # kill everything
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                         shard_2_master, shard_2_replica1, shard_2_replica2,
                         shard_2_rdonly1,
                         shard_3_master, shard_3_replica, shard_3_rdonly1])
Example #37
0
    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', '--split_shard_count', '2',
            'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', '-split_shard_count', '4',
            'test_keyspace', 'keyspace_id', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['SplitShardCount'], 4)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_0_ny_rdonly.wait_for_vttablet_state('SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_ny_rdonly.wait_for_vttablet_state('SERVING')
        shard_1_rdonly.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('spare', 'test_keyspace', 'c0-')
        shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None,
                                      target_tablet_type='replica')
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_replica, shard_3_rdonly
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/c0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        if use_clone_worker:
            # the worker will do everything. We test with source_reader_count=10
            # (down from default=20) as connection pool is not big enough for 20.
            # min_table_size_for_split is set to 1 as to force a split even on the
            # small table we have.
            utils.run_vtworker([
                '--cell', 'test_nj', '--command_display_interval', '10ms',
                'SplitClone', '--exclude_tables', 'unrelated',
                '--strategy=-populate_blp_checkpoint -write_masters_only',
                '--source_reader_count', '10', '--min_table_size_for_split',
                '1', 'test_keyspace/80-c0'
            ],
                               auto_log=True)
            utils.run_vtctl(
                ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                auto_log=True)

            # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        else:
            # take the snapshot for the split
            utils.run_vtctl([
                'MultiSnapshot', '--spec=80-c0-', '--exclude_tables=unrelated',
                shard_1_slave1.tablet_alias
            ],
                            auto_log=True)

            # the snapshot_copy hook will copy the snapshot files to
            # VTDATAROOT/tmp/... as a test. We want to use these for one half,
            # but not for the other, so we test both scenarios.
            os.unlink(
                os.path.join(
                    environment.tmproot, "snapshot-from-%s-for-%s.tar" %
                    (shard_1_slave1.tablet_alias, "80-c0")))

            # wait for tablet's binlog server service to be enabled after snapshot
            shard_1_slave1.wait_for_binlog_server_state("Enabled")

            # perform the restores: first one from source tablet. We removed the
            # storage backup, so it's coming from the tablet itself.
            # we also delay starting the binlog player, then enable it.
            utils.run_vtctl([
                'ShardMultiRestore',
                '-strategy=-populate_blp_checkpoint -dont_start_binlog_player',
                'test_keyspace/80-c0', shard_1_slave1.tablet_alias
            ],
                            auto_log=True)

            timeout = 10
            while True:
                shard_2_master_status = shard_2_master.get_status()
                if not "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                    timeout = utils.wait_step(
                        'shard 2 master has not failed starting yet', timeout)
                    continue
                logging.debug(
                    "shard 2 master is waiting on flag removal, good")
                break

            qr = utils.run_vtctl_json([
                'ExecuteFetch', shard_2_master.tablet_alias,
                'update _vt.blp_checkpoint set flags="" where source_shard_uid=0'
            ])
            self.assertEqual(qr['RowsAffected'], 1)

            timeout = 10
            while True:
                shard_2_master_status = shard_2_master.get_status()
                if "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                    timeout = utils.wait_step(
                        'shard 2 master has not started replication yet',
                        timeout)
                    continue
                logging.debug("shard 2 master has started replication, good")
                break

            # second restore from storage: to be sure, we stop vttablet, and restart
            # it afterwards
            shard_1_slave1.kill_vttablet()
            utils.run_vtctl([
                'ShardMultiRestore', '-strategy=-populate_blp_checkpoint',
                'test_keyspace/c0-', shard_1_slave1.tablet_alias
            ],
                            auto_log=True)
            shard_1_slave1.start_vttablet(wait_for_state=None)
            shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master,
                                       seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master,
                                       seconds_behind_master_max=30)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('Binlog player state: Running', shard_2_master_status)
        self.assertIn(
            '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>',
            shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere.
        self._check_query_service(shard_2_master, False, False)
        self._check_query_service(shard_3_master, False, False)

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(rdonly): -80 80-\n' +
                                 'TabletTypes: rdonly',
                                 keyspace_id_type=keyspace_id_type)
        self._check_query_service(shard_0_ny_rdonly, True, False)
        self._check_query_service(shard_1_ny_rdonly, True, False)
        self._check_query_service(shard_1_rdonly, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'TabletTypes: rdonly',
                                 keyspace_id_type=keyspace_id_type)
        self._check_query_service(shard_0_ny_rdonly, True, False)
        self._check_query_service(shard_1_ny_rdonly, False, True)
        self._check_query_service(shard_1_rdonly, False, True)

        # then serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        self._check_query_service(shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        self._check_query_service(shard_1_slave2, True, False)
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        self._check_query_service(shard_1_slave2, False, True)

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'ReparentShard', 'test_keyspace/80-c0',
            shard_2_replica1.tablet_alias
        ])
        logging.debug(
            "Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use the vtworker checker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/c0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n' +
                                 'Partitions(rdonly): -80 80-c0 c0-\n' +
                                 'Partitions(replica): -80 80-c0 c0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        self._check_query_service(shard_1_master, False, True)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('No binlog player is running', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # scrap the original tablets in the original shard
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly
        ]:
            utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly
        ])
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        if shard['Cells']:
            self.fail("Non-empty Cells record for shard: %s" % str(shard))

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly
        ])
Example #38
0
  def test_resharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'bad_column',
                     '--sharding_column_type', 'bytes',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'keyspace_id', 'uint64'], expect_fail=True)
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                     'keyspace_id', keyspace_id_type])

    shard_0_master.init_tablet( 'master',  'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_ny_slave.init_tablet('spare', 'test_keyspace', '-80')
    shard_1_master.init_tablet( 'master',  'test_keyspace', '80-')
    shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
    shard_1_ny_slave.init_tablet('spare', 'test_keyspace', '80-')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # we set full_mycnf_args to True as a test in the KIT_BYTES case
    full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_0_ny_slave, shard_1_master,
              shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args)

    # wait for the tablets
    shard_0_master.wait_for_vttablet_state('SERVING')
    shard_0_replica.wait_for_vttablet_state('SERVING')
    shard_0_ny_slave.wait_for_vttablet_state('NOT_SERVING') # spare
    shard_1_master.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('SERVING')
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING') # spare
    shard_1_ny_slave.wait_for_vttablet_state('NOT_SERVING') # spare
    shard_1_rdonly.wait_for_vttablet_state('SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()
    self._test_keyrange_constraints()

    # create the split shards
    shard_2_master.init_tablet(  'master', 'test_keyspace', '80-C0')
    shard_2_replica1.init_tablet('spare',  'test_keyspace', '80-C0')
    shard_2_replica2.init_tablet('spare',  'test_keyspace', '80-C0')
    shard_3_master.init_tablet(  'master', 'test_keyspace', 'C0-')
    shard_3_replica.init_tablet( 'spare',  'test_keyspace', 'C0-')
    shard_3_rdonly.init_tablet(  'rdonly', 'test_keyspace', 'C0-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    shard_3_master.start_vttablet(wait_for_state=None,
                                  target_tablet_type='replica')
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_3_replica, shard_3_rdonly]:
      t.start_vttablet(wait_for_state=None)
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_3_master, shard_3_replica, shard_3_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-C0',
                     shard_2_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/C0-',
                     shard_3_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # take the snapshot for the split
    utils.run_vtctl(['MultiSnapshot', '--spec=80-C0-',
                     '--exclude_tables=unrelated',
                     shard_1_slave1.tablet_alias], auto_log=True)

    # the snapshot_copy hook will copy the snapshot files to
    # VTDATAROOT/tmp/... as a test. We want to use these for one half,
    # but not for the other, so we test both scenarios.
    os.unlink(os.path.join(environment.tmproot, "snapshot-from-%s-for-%s.tar" %
                           (shard_1_slave1.tablet_alias, "80-C0")))

    # wait for tablet's binlog server service to be enabled after snapshot
    shard_1_slave1.wait_for_binlog_server_state("Enabled")

    # perform the restores: first one from source tablet. We removed the
    # storage backup, so it's coming from the tablet itself.
    # we also delay starting the binlog player, then enable it.
    utils.run_vtctl(['ShardMultiRestore',
                     '-strategy=populateBlpCheckpoint,dontStartBinlogPlayer',
                     'test_keyspace/80-C0', shard_1_slave1.tablet_alias],
                    auto_log=True)

    timeout = 10
    while True:
      shard_2_master_status = shard_2_master.get_status()
      if not "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
        timeout = utils.wait_step('shard 2 master has not failed starting yet', timeout)
        continue
      logging.debug("shard 2 master is waiting on flag removal, good")
      break

    qr = utils.run_vtctl_json(['ExecuteFetch', shard_2_master.tablet_alias, 'update _vt.blp_checkpoint set flags="" where source_shard_uid=0'])
    self.assertEqual(qr['RowsAffected'], 1)

    timeout = 10
    while True:
      shard_2_master_status = shard_2_master.get_status()
      if "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
        timeout = utils.wait_step('shard 2 master has not started replication yet', timeout)
        continue
      logging.debug("shard 2 master has started replication, good")
      break

    # second restore from storage: to be sure, we stop vttablet, and restart
    # it afterwards
    shard_1_slave1.kill_vttablet()
    utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
                     'test_keyspace/C0-', shard_1_slave1.tablet_alias],
                    auto_log=True)
    shard_1_slave1.start_vttablet(wait_for_state=None)
    shard_1_slave1.wait_for_binlog_server_state("Enabled")

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', '--exclude_tables=unrelated',
                     'test_keyspace'], auto_log=True)

    # check the binlog players are running and exporting vars
    shard_2_master.wait_for_binlog_player_count(1)
    shard_3_master.wait_for_binlog_player_count(1)
    self._check_binlog_player_vars(shard_2_master)
    self._check_binlog_player_vars(shard_3_master)

    # check that binlog server exported the stats vars
    self._check_binlog_server_vars(shard_1_slave1)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug("Inserting lots of data on source shard")
    self._insert_lots(1000)
    logging.debug("Checking 80 percent of data is sent quickly")
    self._check_lots_timeout(1000, 80, 5)
    logging.debug("Checking all data goes through eventually")
    self._check_lots_timeout(1000, 100, 20)
    logging.debug("Checking no data was sent the wrong way")
    self._check_lots_not_present(1000)
    self._check_binlog_player_vars(shard_2_master, seconds_behind_master_max=30)
    self._check_binlog_player_vars(shard_3_master, seconds_behind_master_max=30)

    # use the vtworker checker to compare the data
    logging.debug("Running vtworker SplitDiff")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause("Good time to test vtworker for diffs")

    # get status for a destination master tablet, make sure we have it all
    shard_2_master_status = shard_2_master.get_status()
    self.assertIn('Binlog player state: Running', shard_2_master_status)
    self.assertIn('<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>', shard_2_master_status)
    self.assertIn('</html>', shard_2_master_status)

    # start a thread to insert data into shard_1 in the background
    # with current time, and monitor the delay
    insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                   0x9000000000000000)
    insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                   0xD000000000000000)
    monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
    monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

    # tests a failover switching serving to a different replica
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

    # test data goes through again
    logging.debug("Inserting lots of data on source shard")
    self._insert_lots(1000, base=1000)
    logging.debug("Checking 80 percent of data was sent quickly")
    self._check_lots_timeout(1000, 80, 5, base=1000)

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    expect_fail=True)

    # check query service is off on master 2 and master 3, as filtered
    # replication is enabled. Even health check that is enabled on
    # master 3 should not interfere.
    shard_2_master_vars = utils.get_vars(shard_2_master.port)
    self.assertEqual(shard_2_master_vars['TabletStateName'], 'NOT_SERVING')
    shard_3_master_vars = utils.get_vars(shard_3_master.port)
    self.assertEqual(shard_3_master_vars['TabletStateName'], 'NOT_SERVING')

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # then serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-C0 C0-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # move replica back and forth
    utils.run_vtctl(['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-C0 C0-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # reparent shard_2 to shard_2_replica1, then insert more data and
    # see it flow through still
    utils.run_vtctl(['ReparentShard', 'test_keyspace/80-C0',
                    shard_2_replica1.tablet_alias])
    logging.debug("Inserting lots of data on source shard after reparenting")
    self._insert_lots(3000, base=2000)
    logging.debug("Checking 80 percent of data was sent fairly quickly")
    self._check_lots_timeout(3000, 80, 10, base=2000)

    # use the vtworker checker to compare the data again
    logging.debug("Running vtworker SplitDiff")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)

    # going to migrate the master now, check the delays
    monitor_thread_1.done = True
    monitor_thread_2.done = True
    insert_thread_1.done = True
    insert_thread_2.done = True
    logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                  monitor_thread_1.object_name,
                  monitor_thread_1.max_lag,
                  monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
    logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                  monitor_thread_2.object_name,
                  monitor_thread_2.max_lag,
                  monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

    # then serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-C0 C0-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-C0 C0-\n' +
                             'TabletTypes: master,rdonly,replica',
                             keyspace_id_type=keyspace_id_type)

    # check the binlog players are gone now
    shard_2_master.wait_for_binlog_player_count(0)
    shard_3_master.wait_for_binlog_player_count(0)

    # get status for a destination master tablet, make sure it's good
    shard_2_master_status = shard_2_master.get_status()
    self.assertIn('No binlog player is running', shard_2_master_status)
    self.assertIn('</html>', shard_2_master_status)

    # scrap the original tablets in the original shard
    for t in [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave,
              shard_1_rdonly]:
      utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
    tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2,
                         shard_1_ny_slave, shard_1_rdonly])
    for t in [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave,
              shard_1_rdonly]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # test RemoveShardCell
    utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True, expect_fail=True)
    utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True)
    utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True)
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
    if shard['Cells']:
      self.fail("Non-empty Cells record for shard: %s" % str(shard))

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

    # kill everything
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_ny_slave,
                         shard_2_master, shard_2_replica1, shard_2_replica2,
                         shard_3_master, shard_3_replica, shard_3_rdonly])
Example #39
0
    def test_resharding(self):
        utils.run_vtctl(
            [
                "CreateKeyspace",
                "--sharding_column_name",
                "bad_column",
                "--sharding_column_type",
                "bytes",
                "--split_shard_count",
                "2",
                "test_keyspace",
            ]
        )
        utils.run_vtctl(["SetKeyspaceShardingInfo", "test_keyspace", "keyspace_id", "uint64"], expect_fail=True)
        utils.run_vtctl(
            [
                "SetKeyspaceShardingInfo",
                "-force",
                "-split_shard_count",
                "4",
                "test_keyspace",
                "keyspace_id",
                keyspace_id_type,
            ]
        )

        shard_0_master.init_tablet("master", "test_keyspace", "-80")
        shard_0_replica.init_tablet("replica", "test_keyspace", "-80")
        shard_0_ny_slave.init_tablet("spare", "test_keyspace", "-80")
        shard_1_master.init_tablet("master", "test_keyspace", "80-")
        shard_1_slave1.init_tablet("replica", "test_keyspace", "80-")
        shard_1_slave2.init_tablet("spare", "test_keyspace", "80-")
        shard_1_ny_slave.init_tablet("spare", "test_keyspace", "80-")
        shard_1_rdonly.init_tablet("rdonly", "test_keyspace", "80-")

        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

        ks = utils.run_vtctl_json(["GetSrvKeyspace", "test_nj", "test_keyspace"])
        self.assertEqual(ks["SplitShardCount"], 4)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
            shard_0_master,
            shard_0_replica,
            shard_0_ny_slave,
            shard_1_master,
            shard_1_slave1,
            shard_1_slave2,
            shard_1_ny_slave,
            shard_1_rdonly,
        ]:
            t.create_db("vt_test_keyspace")
            t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state("SERVING")
        shard_0_replica.wait_for_vttablet_state("SERVING")
        shard_0_ny_slave.wait_for_vttablet_state("NOT_SERVING")  # spare
        shard_1_master.wait_for_vttablet_state("SERVING")
        shard_1_slave1.wait_for_vttablet_state("SERVING")
        shard_1_slave2.wait_for_vttablet_state("NOT_SERVING")  # spare
        shard_1_ny_slave.wait_for_vttablet_state("NOT_SERVING")  # spare
        shard_1_rdonly.wait_for_vttablet_state("SERVING")

        # reparent to make the tablets work
        utils.run_vtctl(["ReparentShard", "-force", "test_keyspace/-80", shard_0_master.tablet_alias], auto_log=True)
        utils.run_vtctl(["ReparentShard", "-force", "test_keyspace/80-", shard_1_master.tablet_alias], auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # create the split shards
        shard_2_master.init_tablet("master", "test_keyspace", "80-c0")
        shard_2_replica1.init_tablet("spare", "test_keyspace", "80-c0")
        shard_2_replica2.init_tablet("spare", "test_keyspace", "80-c0")
        shard_3_master.init_tablet("master", "test_keyspace", "c0-")
        shard_3_replica.init_tablet("spare", "test_keyspace", "c0-")
        shard_3_rdonly.init_tablet("rdonly", "test_keyspace", "c0-")

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None, target_tablet_type="replica")
        for t in [shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_replica, shard_3_rdonly]:
            t.start_vttablet(wait_for_state=None)
        for t in [shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_master, shard_3_replica, shard_3_rdonly]:
            t.wait_for_vttablet_state("NOT_SERVING")

        utils.run_vtctl(["ReparentShard", "-force", "test_keyspace/80-c0", shard_2_master.tablet_alias], auto_log=True)
        utils.run_vtctl(["ReparentShard", "-force", "test_keyspace/c0-", shard_3_master.tablet_alias], auto_log=True)

        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            + "Partitions(rdonly): -80 80-\n"
            + "Partitions(replica): -80 80-\n"
            + "TabletTypes: master,rdonly,replica",
            keyspace_id_type=keyspace_id_type,
        )

        if use_clone_worker:
            # the worker will do everything. We test with source_reader_count=10
            # (down from default=20) as connection pool is not big enough for 20.
            # min_table_size_for_split is set to 1 as to force a split even on the
            # small table we have.
            utils.run_vtworker(
                [
                    "--cell",
                    "test_nj",
                    "--command_display_interval",
                    "10ms",
                    "SplitClone",
                    "--exclude_tables",
                    "unrelated",
                    "--strategy",
                    "populateBlpCheckpoint",
                    "--source_reader_count",
                    "10",
                    "--min_table_size_for_split",
                    "1",
                    "test_keyspace/80-c0",
                ],
                auto_log=True,
            )

            # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        else:
            # take the snapshot for the split
            utils.run_vtctl(
                ["MultiSnapshot", "--spec=80-c0-", "--exclude_tables=unrelated", shard_1_slave1.tablet_alias],
                auto_log=True,
            )

            # the snapshot_copy hook will copy the snapshot files to
            # VTDATAROOT/tmp/... as a test. We want to use these for one half,
            # but not for the other, so we test both scenarios.
            os.unlink(
                os.path.join(
                    environment.tmproot, "snapshot-from-%s-for-%s.tar" % (shard_1_slave1.tablet_alias, "80-c0")
                )
            )

            # wait for tablet's binlog server service to be enabled after snapshot
            shard_1_slave1.wait_for_binlog_server_state("Enabled")

            # perform the restores: first one from source tablet. We removed the
            # storage backup, so it's coming from the tablet itself.
            # we also delay starting the binlog player, then enable it.
            utils.run_vtctl(
                [
                    "ShardMultiRestore",
                    "-strategy=populateBlpCheckpoint,dontStartBinlogPlayer",
                    "test_keyspace/80-c0",
                    shard_1_slave1.tablet_alias,
                ],
                auto_log=True,
            )

            timeout = 10
            while True:
                shard_2_master_status = shard_2_master.get_status()
                if not "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                    timeout = utils.wait_step("shard 2 master has not failed starting yet", timeout)
                    continue
                logging.debug("shard 2 master is waiting on flag removal, good")
                break

            qr = utils.run_vtctl_json(
                [
                    "ExecuteFetch",
                    shard_2_master.tablet_alias,
                    'update _vt.blp_checkpoint set flags="" where source_shard_uid=0',
                ]
            )
            self.assertEqual(qr["RowsAffected"], 1)

            timeout = 10
            while True:
                shard_2_master_status = shard_2_master.get_status()
                if "not starting because flag &#39;DontStart&#39; is set" in shard_2_master_status:
                    timeout = utils.wait_step("shard 2 master has not started replication yet", timeout)
                    continue
                logging.debug("shard 2 master has started replication, good")
                break

            # second restore from storage: to be sure, we stop vttablet, and restart
            # it afterwards
            shard_1_slave1.kill_vttablet()
            utils.run_vtctl(
                [
                    "ShardMultiRestore",
                    "-strategy=populateBlpCheckpoint",
                    "test_keyspace/c0-",
                    shard_1_slave1.tablet_alias,
                ],
                auto_log=True,
            )
            shard_1_slave1.start_vttablet(wait_for_state=None)
            shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(["ValidateSchemaKeyspace", "--exclude_tables=unrelated", "test_keyspace"], auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master, seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master, seconds_behind_master_max=30)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(["-cell", "test_nj", "SplitDiff", "test_keyspace/c0-"], auto_log=True)
        utils.run_vtctl(["ChangeSlaveType", shard_1_rdonly.tablet_alias, "rdonly"], auto_log=True)
        utils.run_vtctl(["ChangeSlaveType", shard_3_rdonly.tablet_alias, "rdonly"], auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn("Binlog player state: Running", shard_2_master_status)
        self.assertIn(
            "<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>", shard_2_master_status
        )
        self.assertIn("</html>", shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000, 0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001, 0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(["ChangeSlaveType", shard_1_slave2.tablet_alias, "replica"])
        utils.run_vtctl(["ChangeSlaveType", shard_1_slave1.tablet_alias, "spare"])
        shard_1_slave2.wait_for_vttablet_state("SERVING")
        shard_1_slave1.wait_for_vttablet_state("NOT_SERVING")

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "master"], expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere.
        shard_2_master_vars = utils.get_vars(shard_2_master.port)
        self.assertEqual(shard_2_master_vars["TabletStateName"], "NOT_SERVING")
        shard_3_master_vars = utils.get_vars(shard_3_master.port)
        self.assertEqual(shard_3_master_vars["TabletStateName"], "NOT_SERVING")

        # now serve rdonly from the split shards
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "rdonly"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            + "Partitions(rdonly): -80 80-c0 c0-\n"
            + "Partitions(replica): -80 80-\n"
            + "TabletTypes: master,rdonly,replica",
            keyspace_id_type=keyspace_id_type,
        )

        # then serve replica from the split shards
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "replica"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            + "Partitions(rdonly): -80 80-c0 c0-\n"
            + "Partitions(replica): -80 80-c0 c0-\n"
            + "TabletTypes: master,rdonly,replica",
            keyspace_id_type=keyspace_id_type,
        )

        # move replica back and forth
        utils.run_vtctl(["MigrateServedTypes", "-reverse", "test_keyspace/80-", "replica"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            + "Partitions(rdonly): -80 80-c0 c0-\n"
            + "Partitions(replica): -80 80-\n"
            + "TabletTypes: master,rdonly,replica",
            keyspace_id_type=keyspace_id_type,
        )
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "replica"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            + "Partitions(rdonly): -80 80-c0 c0-\n"
            + "Partitions(replica): -80 80-c0 c0-\n"
            + "TabletTypes: master,rdonly,replica",
            keyspace_id_type=keyspace_id_type,
        )

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl(["ReparentShard", "test_keyspace/80-c0", shard_2_replica1.tablet_alias])
        logging.debug("Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use the vtworker checker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(["-cell", "test_nj", "SplitDiff", "test_keyspace/c0-"], auto_log=True)
        utils.run_vtctl(["ChangeSlaveType", shard_1_rdonly.tablet_alias, "rdonly"], auto_log=True)
        utils.run_vtctl(["ChangeSlaveType", shard_3_rdonly.tablet_alias, "rdonly"], auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug(
            "DELAY 1: %s max_lag=%u avg_lag=%u",
            monitor_thread_1.object_name,
            monitor_thread_1.max_lag,
            monitor_thread_1.lag_sum / monitor_thread_1.sample_count,
        )
        logging.debug(
            "DELAY 2: %s max_lag=%u avg_lag=%u",
            monitor_thread_2.object_name,
            monitor_thread_2.max_lag,
            monitor_thread_2.lag_sum / monitor_thread_2.sample_count,
        )

        # then serve master from the split shards
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "master"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-c0 c0-\n"
            + "Partitions(rdonly): -80 80-c0 c0-\n"
            + "Partitions(replica): -80 80-c0 c0-\n"
            + "TabletTypes: master,rdonly,replica",
            keyspace_id_type=keyspace_id_type,
        )

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn("No binlog player is running", shard_2_master_status)
        self.assertIn("</html>", shard_2_master_status)

        # scrap the original tablets in the original shard
        for t in [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly]:
            utils.run_vtctl(["ScrapTablet", t.tablet_alias], auto_log=True)
        tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly])
        for t in [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_slave, shard_1_rdonly]:
            utils.run_vtctl(["DeleteTablet", t.tablet_alias], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(["RemoveShardCell", "test_keyspace/-80", "test_nj"], auto_log=True, expect_fail=True)
        utils.run_vtctl(["RemoveShardCell", "test_keyspace/80-", "test_nj"], auto_log=True)
        utils.run_vtctl(["RemoveShardCell", "test_keyspace/80-", "test_ny"], auto_log=True)
        shard = utils.run_vtctl_json(["GetShard", "test_keyspace/80-"])
        if shard["Cells"]:
            self.fail("Non-empty Cells record for shard: %s" % str(shard))

        # delete the original shard
        utils.run_vtctl(["DeleteShard", "test_keyspace/80-"], auto_log=True)

        # kill everything
        tablet.kill_tablets(
            [
                shard_0_master,
                shard_0_replica,
                shard_0_ny_slave,
                shard_2_master,
                shard_2_replica1,
                shard_2_replica2,
                shard_3_master,
                shard_3_replica,
                shard_3_rdonly,
            ]
        )
Example #40
0
  def test_resharding(self):
    # create the keyspace with just one shard
    shard_master.init_tablet(
        'master',
        keyspace='test_keyspace',
        shard='0',
        tablet_index=0)
    shard_replica.init_tablet(
        'replica',
        keyspace='test_keyspace',
        shard='0',
        tablet_index=1)
    shard_rdonly1.init_tablet(
        'rdonly',
        keyspace='test_keyspace',
        shard='0',
        tablet_index=2)

    for t in [shard_master, shard_replica, shard_rdonly1]:
      t.create_db('vt_test_keyspace')

    shard_master.start_vttablet(wait_for_state=None)
    shard_replica.start_vttablet(wait_for_state=None)
    shard_rdonly1.start_vttablet(wait_for_state=None)

    shard_master.wait_for_vttablet_state('SERVING')
    for t in [shard_replica, shard_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
                     shard_master.tablet_alias], auto_log=True)

    utils.wait_for_tablet_type(shard_replica.tablet_alias, 'replica')
    utils.wait_for_tablet_type(shard_rdonly1.tablet_alias, 'rdonly')
    for t in [shard_master, shard_replica, shard_rdonly1]:
      t.wait_for_vttablet_state('SERVING')

    # create the tables and add startup values
    self._create_schema()
    self._insert_startup_values()

    # reload schema on all tablets so we can query them
    for t in [shard_master, shard_replica, shard_rdonly1]:
      utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

    # must start vtgate after tablets are up, or else wait until 1min refresh
    # we want cache_ttl at zero so we re-read the topology for every test query.
    utils.VtGate().start(cache_ttl='0', tablets=[
        shard_master, shard_replica, shard_rdonly1])
    utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)

    # check the Map Reduce API works correctly, should use ExecuteShards,
    # as we're not sharded yet.
    # we have 3 values in the database, asking for 4 splits will get us
    # a single query.
    sql = 'select id, msg from resharding1'
    s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
    self.assertEqual(len(s), 1)
    self.assertEqual(s[0]['shard_part']['shards'][0], '0')

    # change the schema, backfill keyspace_id, and change schema again
    self._add_sharding_key_to_schema()
    self._backfill_keyspace_id(shard_master)
    self._mark_sharding_key_not_null()

    # now we can be a sharded keyspace (and propagate to SrvKeyspace)
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'custom_ksid_col', base_sharding.keyspace_id_type])
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)

    # run a health check on source replica so it responds to discovery
    utils.run_vtctl(['RunHealthCheck', shard_replica.tablet_alias])

    # create the split shards
    shard_0_master.init_tablet(
        'master',
        keyspace='test_keyspace',
        shard='-80',
        tablet_index=0)
    shard_0_replica.init_tablet(
        'replica',
        keyspace='test_keyspace',
        shard='-80',
        tablet_index=1)
    shard_0_rdonly1.init_tablet(
        'rdonly',
        keyspace='test_keyspace',
        shard='-80',
        tablet_index=2)
    shard_1_master.init_tablet(
        'master',
        keyspace='test_keyspace',
        shard='80-',
        tablet_index=0)
    shard_1_replica.init_tablet(
        'replica',
        keyspace='test_keyspace',
        shard='80-',
        tablet_index=1)
    shard_1_rdonly1.init_tablet(
        'rdonly',
        keyspace='test_keyspace',
        shard='80-',
        tablet_index=2)

    for t in [shard_0_master, shard_0_replica,
              shard_1_master, shard_1_replica]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)
    for t in [shard_0_rdonly1, shard_1_rdonly1]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)

    for t in [shard_0_master, shard_1_master]:
      t.wait_for_vttablet_state('SERVING')
    for t in [shard_0_replica, shard_0_rdonly1,
              shard_1_replica, shard_1_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    for t in [shard_0_replica, shard_1_replica]:
      utils.wait_for_tablet_type(t.tablet_alias, 'replica')
    for t in [shard_0_rdonly1, shard_1_rdonly1]:
      utils.wait_for_tablet_type(t.tablet_alias, 'rdonly')

    sharded_tablets = [shard_0_master, shard_0_replica, shard_0_rdonly1,
                       shard_1_master, shard_1_replica, shard_1_rdonly1]
    for t in sharded_tablets:
      t.wait_for_vttablet_state('SERVING')

    # must restart vtgate after tablets are up, or else wait until 1min refresh
    # we want cache_ttl at zero so we re-read the topology for every test query.
    utils.vtgate.kill()
    utils.VtGate().start(cache_ttl='0', tablets=[
        shard_master, shard_replica, shard_rdonly1,
        shard_0_master, shard_0_replica, shard_0_rdonly1,
        shard_1_master, shard_1_replica, shard_1_rdonly1])
    utils.vtgate.wait_for_endpoints('test_keyspace.0.master', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.0.replica', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.0.rdonly', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.-80.replica', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1)

    # check the Map Reduce API works correctly, should use ExecuteKeyRanges now,
    # as we are sharded (with just one shard).
    # again, we have 3 values in the database, asking for 4 splits will get us
    # a single query.
    sql = 'select id, msg from resharding1'
    s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
    self.assertEqual(len(s), 1)
    self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
    # There must be one empty KeyRange which represents the full keyspace.
    self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
    self.assertEqual(s[0]['key_range_part']['key_ranges'][0], {})

    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -\n'
                             'Partitions(replica): -\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # we need to create the schema, and the worker will do data copying
    for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
      utils.run_vtctl(['CopySchemaShard',
                       '--exclude_tables', 'unrelated',
                       shard_rdonly1.tablet_alias,
                       keyspace_shard],
                      auto_log=True)
    utils.run_vtctl(['RunHealthCheck', shard_rdonly1.tablet_alias])

    # Run vtworker as daemon for the following SplitClone commands.
    worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
        ['--cell', 'test_nj', '--command_display_interval', '10ms'],
        auto_log=True)

    # Initial clone (online).
    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--offline=false',
         '--exclude_tables', 'unrelated',
         '--min_table_size_for_split', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/0'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        3, 0, 0)

    # Reset vtworker such that we can run the next command.
    workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
    utils.wait_procs([workerclient_proc])

    # Modify the destination shard. SplitClone will revert the changes.
    # Delete row 1 (provokes an insert).
    shard_0_master.mquery('vt_test_keyspace',
                          'delete from resharding1 where id=1', write=True)
    # Delete row 2 (provokes an insert).
    shard_1_master.mquery('vt_test_keyspace',
                          'delete from resharding1 where id=2', write=True)
    # Update row 3 (provokes an update).
    shard_1_master.mquery('vt_test_keyspace',
                          "update resharding1 set msg='msg-not-3' where id=3",
                          write=True)
    # Insert row 4 (provokes a delete).
    self._insert_value(shard_1_master, 'resharding1', 4, 'msg4',
                       0xD000000000000000)

    workerclient_proc = utils.run_vtworker_client_bg(
        ['SplitClone',
         '--exclude_tables', 'unrelated',
         '--min_table_size_for_split', '1',
         '--min_healthy_rdonly_tablets', '1',
         'test_keyspace/0'],
        worker_rpc_port)
    utils.wait_procs([workerclient_proc])
    self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
                                        2, 1, 1)
    self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1',
                                        0, 0, 0)
    # Terminate worker daemon because it is no longer needed.
    utils.kill_sub_process(worker_proc, soft=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check the binlog players are running
    logging.debug('Waiting for binlog players to start on new masters...')
    self.check_destination_master(shard_0_master, ['test_keyspace/0'])
    self.check_destination_master(shard_1_master, ['test_keyspace/0'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_replica, horizontal=True)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 5)
    if v != 100:
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 20)
    logging.debug('Checking no data was sent the wrong way')
    self._check_lots_not_present(1000)
    self.check_binlog_player_vars(shard_0_master, ['test_keyspace/0'],
                                  seconds_behind_master_max=30)
    self.check_binlog_player_vars(shard_1_master, ['test_keyspace/0'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_replica, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data
    logging.debug('Running vtworker SplitDiff for -80')
    for t in [shard_0_rdonly1, shard_1_rdonly1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--min_healthy_rdonly_tablets', '1',
                        'test_keyspace/-80'],
                       auto_log=True)

    logging.debug('Running vtworker SplitDiff for 80-')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--min_healthy_rdonly_tablets', '1',
                        'test_keyspace/80-'],
                       auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # get status for the destination master tablet, make sure we have it all
    self.check_running_binlog_player(shard_0_master, 2000, 2000)
    self.check_running_binlog_player(shard_1_master, 6000, 2000)

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    expect_fail=True)

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # make sure rdonly tablets are back to serving before hitting vtgate.
    for t in [shard_0_rdonly1, shard_1_rdonly1]:
      t.wait_for_vttablet_state('SERVING')
    utils.vtgate.wait_for_endpoints('test_keyspace.-80.rdonly', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.80-.rdonly', 1)

    # check the Map Reduce API works correctly, should use ExecuteKeyRanges
    # on both destination shards now.
    # we ask for 2 splits to only have one per shard
    sql = 'select id, msg from resharding1'
    s = utils.vtgate.split_query(sql, 'test_keyspace', 2)
    self.assertEqual(len(s), 2)
    self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
    self.assertEqual(s[1]['key_range_part']['keyspace'], 'test_keyspace')
    self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
    self.assertEqual(len(s[1]['key_range_part']['key_ranges']), 1)

    # then serve replica from the split shards
    source_tablet = shard_replica
    destination_tablets = [shard_0_replica, shard_1_replica]

    utils.run_vtctl(
        ['MigrateServedTypes', 'test_keyspace/0', 'replica'], auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # move replica back and forth
    utils.run_vtctl(
        ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
        auto_log=True)
    # After a backwards migration, queryservice should be enabled on
    # source and disabled on destinations
    utils.check_tablet_query_service(self, source_tablet, True, False)
    utils.check_tablet_query_services(self, destination_tablets, False, True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    # After a forwards migration, queryservice should be disabled on
    # source and enabled on destinations
    utils.check_tablet_query_service(self, source_tablet, False, True)
    utils.check_tablet_query_services(self, destination_tablets, True, False)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # then serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=base_sharding.keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_0_master)
    self.check_no_binlog_player(shard_1_master)

    # make sure we can't delete a shard with tablets
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

    # remove the original tablets in the original shard
    tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
    for t in [shard_replica, shard_rdonly1]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    utils.run_vtctl(['DeleteTablet', '-allow_master',
                     shard_master.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly1,
                         shard_1_master, shard_1_replica, shard_1_rdonly1])
Example #41
0
    def test_resharding(self):
        # create the keyspace with just one shard
        utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'keyspace_id', keyspace_id_type
        ])

        shard_master.init_tablet('master', 'test_keyspace', '0')
        shard_replica.init_tablet('replica', 'test_keyspace', '0')
        shard_rdonly1.init_tablet('rdonly', 'test_keyspace', '0')
        shard_rdonly2.init_tablet('rdonly', 'test_keyspace', '0')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # create databases so vttablet can start behaving normally
        for t in [shard_master, shard_replica, shard_rdonly1, shard_rdonly2]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None)

        # wait for the tablets
        shard_master.wait_for_vttablet_state('SERVING')
        shard_replica.wait_for_vttablet_state('SERVING')
        shard_rdonly1.wait_for_vttablet_state('SERVING')
        shard_rdonly2.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/0',
            shard_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables and add startup values
        self._create_schema()
        self._insert_startup_values()

        # change the schema, backfill keyspace_id, and change schema again
        self._add_sharding_key_to_schema()
        self._backfill_keyspace_id(shard_master)
        self._mark_sharding_key_not_null()

        # create the split shards
        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_rdonly1.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_0_rdonly2.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly2.init_tablet('rdonly', 'test_keyspace', '80-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly1,
                shard_0_rdonly2, shard_1_master, shard_1_replica,
                shard_1_rdonly1, shard_1_rdonly2
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_0_master, shard_0_replica, shard_0_rdonly1,
                shard_0_rdonly2, shard_1_master, shard_1_replica,
                shard_1_rdonly1, shard_1_rdonly2
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace',
            'Partitions(master): -\n' + 'Partitions(rdonly): -\n' +
            'Partitions(replica): -\n' + 'TabletTypes: master,rdonly,replica',
            keyspace_id_type=keyspace_id_type)

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'SplitClone', '--exclude_tables', 'unrelated',
            '--strategy=-populate_blp_checkpoint', '--source_reader_count',
            '10', '--min_table_size_for_split', '1', 'test_keyspace/0'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_rdonly2.tablet_alias, 'rdonly'],
            auto_log=True)

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check the binlog players are running
        logging.debug("Waiting for binlog players to start on new masters...")
        shard_0_master.wait_for_binlog_player_count(1)
        shard_1_master.wait_for_binlog_player_count(1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff for -80")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/-80'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_rdonly2.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_0_rdonly2.tablet_alias, 'rdonly'],
            auto_log=True)

        logging.debug("Running vtworker SplitDiff for 80-")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/80-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_rdonly2.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly2.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        expect_fail=True)

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace',
            'Partitions(master): -\n' + 'Partitions(rdonly): -80 80-\n' +
            'Partitions(replica): -\n' + 'TabletTypes: master,rdonly,replica',
            keyspace_id_type=keyspace_id_type)

        # then serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
            auto_log=True)
        utils.check_srv_keyspace(
            'test_nj',
            'test_keyspace',
            'Partitions(master): -\n' + 'Partitions(rdonly): -80 80-\n' +
            'Partitions(replica): -\n' + 'TabletTypes: master,rdonly,replica',
            keyspace_id_type=keyspace_id_type)
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # check the binlog players are gone now
        shard_0_master.wait_for_binlog_player_count(0)
        shard_1_master.wait_for_binlog_player_count(0)

        # make sure we can't delete a shard with tablets
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

        # scrap the original tablets in the original shard
        for t in [shard_master, shard_replica, shard_rdonly1, shard_rdonly2]:
            utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
        tablet.kill_tablets(
            [shard_master, shard_replica, shard_rdonly1, shard_rdonly2])
        for t in [shard_master, shard_replica, shard_rdonly1, shard_rdonly2]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

        # kill everything else
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_rdonly1, shard_0_rdonly2,
            shard_1_master, shard_1_replica, shard_1_rdonly1, shard_1_rdonly2
        ])
Example #42
0
    def test_resharding(self):
        utils.run_vtctl(
            [
                "CreateKeyspace",
                "--sharding_column_name",
                "bad_column",
                "--sharding_column_type",
                "bytes",
                "--split_shard_count",
                "2",
                "test_keyspace",
            ]
        )
        utils.run_vtctl(["SetKeyspaceShardingInfo", "test_keyspace", "keyspace_id", "uint64"], expect_fail=True)
        utils.run_vtctl(
            [
                "SetKeyspaceShardingInfo",
                "-force",
                "-split_shard_count",
                "4",
                "test_keyspace",
                "keyspace_id",
                keyspace_id_type,
            ]
        )

        shard_0_master.init_tablet("master", "test_keyspace", "-80")
        shard_0_replica.init_tablet("replica", "test_keyspace", "-80")
        shard_0_ny_rdonly.init_tablet("rdonly", "test_keyspace", "-80")
        shard_1_master.init_tablet("master", "test_keyspace", "80-")
        shard_1_slave1.init_tablet("replica", "test_keyspace", "80-")
        shard_1_slave2.init_tablet("spare", "test_keyspace", "80-")
        shard_1_ny_rdonly.init_tablet("rdonly", "test_keyspace", "80-")
        shard_1_rdonly1.init_tablet("rdonly", "test_keyspace", "80-")

        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

        ks = utils.run_vtctl_json(["GetSrvKeyspace", "test_nj", "test_keyspace"])
        self.assertEqual(ks["split_shard_count"], 4)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
            shard_0_master,
            shard_0_replica,
            shard_0_ny_rdonly,
            shard_1_master,
            shard_1_slave1,
            shard_1_slave2,
            shard_1_ny_rdonly,
            shard_1_rdonly1,
        ]:
            t.create_db("vt_test_keyspace")
            t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state("SERVING")
        shard_0_replica.wait_for_vttablet_state("SERVING")
        shard_0_ny_rdonly.wait_for_vttablet_state("SERVING")
        shard_1_master.wait_for_vttablet_state("SERVING")
        shard_1_slave1.wait_for_vttablet_state("SERVING")
        shard_1_slave2.wait_for_vttablet_state("NOT_SERVING")  # spare
        shard_1_ny_rdonly.wait_for_vttablet_state("SERVING")
        shard_1_rdonly1.wait_for_vttablet_state("SERVING")

        # reparent to make the tablets work
        utils.run_vtctl(["InitShardMaster", "test_keyspace/-80", shard_0_master.tablet_alias], auto_log=True)
        utils.run_vtctl(["InitShardMaster", "test_keyspace/80-", shard_1_master.tablet_alias], auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()
        self._test_keyrange_constraints()

        # create the split shards
        shard_2_master.init_tablet("master", "test_keyspace", "80-c0")
        shard_2_replica1.init_tablet("spare", "test_keyspace", "80-c0")
        shard_2_replica2.init_tablet("spare", "test_keyspace", "80-c0")
        shard_3_master.init_tablet("master", "test_keyspace", "c0-")
        shard_3_replica.init_tablet("spare", "test_keyspace", "c0-")
        shard_3_rdonly1.init_tablet("rdonly", "test_keyspace", "c0-")

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_3_master.start_vttablet(wait_for_state=None, target_tablet_type="replica")
        for t in [shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_replica, shard_3_rdonly1]:
            t.start_vttablet(wait_for_state=None)
        for t in [shard_2_master, shard_2_replica1, shard_2_replica2, shard_3_master, shard_3_replica, shard_3_rdonly1]:
            t.wait_for_vttablet_state("NOT_SERVING")

        utils.run_vtctl(["InitShardMaster", "test_keyspace/80-c0", shard_2_master.tablet_alias], auto_log=True)
        utils.run_vtctl(["InitShardMaster", "test_keyspace/c0-", shard_3_master.tablet_alias], auto_log=True)

        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n" + "Partitions(rdonly): -80 80-\n" + "Partitions(replica): -80 80-\n",
            keyspace_id_type=keyspace_id_type,
        )

        # the worker will do everything. We test with source_reader_count=10
        # (down from default=20) as connection pool is not big enough for 20.
        # min_table_size_for_split is set to 1 as to force a split even on the
        # small table we have.
        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ("test_keyspace/80-c0", "test_keyspace/c0-"):
            utils.run_vtctl(
                ["CopySchemaShard", "--exclude_tables", "unrelated", shard_1_rdonly1.tablet_alias, keyspace_shard],
                auto_log=True,
            )

        utils.run_vtworker(
            [
                "--cell",
                "test_nj",
                "--command_display_interval",
                "10ms",
                "SplitClone",
                "--exclude_tables",
                "unrelated",
                "--strategy=-populate_blp_checkpoint",
                "--source_reader_count",
                "10",
                "--min_table_size_for_split",
                "1",
                "test_keyspace/80-",
            ],
            auto_log=True,
        )
        utils.run_vtctl(["ChangeSlaveType", shard_1_rdonly1.tablet_alias, "rdonly"], auto_log=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(["ValidateSchemaKeyspace", "--exclude_tables=unrelated", "test_keyspace"], auto_log=True)

        # check the binlog players are running and exporting vars
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)
        self._check_binlog_player_vars(shard_2_master)
        self._check_binlog_player_vars(shard_3_master)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        self._check_stream_health_equals_binlog_player_vars(shard_2_master)
        self._check_stream_health_equals_binlog_player_vars(shard_3_master)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)
        self._check_binlog_player_vars(shard_2_master, seconds_behind_master_max=30)
        self._check_binlog_player_vars(shard_3_master, seconds_behind_master_max=30)

        # use vtworker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ["-cell", "test_nj", "SplitDiff", "--exclude_tables", "unrelated", "test_keyspace/c0-"], auto_log=True
        )
        utils.run_vtctl(["ChangeSlaveType", shard_1_rdonly1.tablet_alias, "rdonly"], auto_log=True)
        utils.run_vtctl(["ChangeSlaveType", shard_3_rdonly1.tablet_alias, "rdonly"], auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn("Binlog player state: Running", shard_2_master_status)
        self.assertIn(
            "<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>", shard_2_master_status
        )
        self.assertIn("</html>", shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000, 0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001, 0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(["ChangeSlaveType", shard_1_slave2.tablet_alias, "replica"])
        utils.run_vtctl(["ChangeSlaveType", shard_1_slave1.tablet_alias, "spare"])
        shard_1_slave2.wait_for_vttablet_state("SERVING")
        shard_1_slave1.wait_for_vttablet_state("NOT_SERVING")

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "master"], expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(["RunHealthCheck", shard_3_master.tablet_alias, "replica"], auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(["VtTabletStreamHealth", "-count", "1", master.tablet_alias])
            logging.debug("Got health: %s", str(stream_health))
            self.assertIn("realtime_stats", stream_health)
            self.assertNotIn("serving", stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl(["MigrateServedTypes", "--cells=test_nj", "test_keyspace/80-", "rdonly"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n" "Partitions(rdonly): -80 80-c0 c0-\n" "Partitions(replica): -80 80-\n",
            keyspace_id_type=keyspace_id_type,
        )
        utils.check_srv_keyspace(
            "test_ny",
            "test_keyspace",
            "Partitions(master): -80 80-\n" "Partitions(rdonly): -80 80-\n" "Partitions(replica): -80 80-\n",
            keyspace_id_type=keyspace_id_type,
        )
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "rdonly"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n" "Partitions(rdonly): -80 80-c0 c0-\n" "Partitions(replica): -80 80-\n",
            keyspace_id_type=keyspace_id_type,
        )
        utils.check_srv_keyspace(
            "test_ny",
            "test_keyspace",
            "Partitions(master): -80 80-\n" "Partitions(rdonly): -80 80-c0 c0-\n" "Partitions(replica): -80 80-\n",
            keyspace_id_type=keyspace_id_type,
        )
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        source_tablet = shard_1_slave2
        destination_shards = ["test_keyspace/80-c0", "test_keyspace/c0-"]

        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "replica"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            "Partitions(rdonly): -80 80-c0 c0-\n"
            "Partitions(replica): -80 80-c0 c0-\n",
            keyspace_id_type=keyspace_id_type,
        )
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(["MigrateServedTypes", "-reverse", "test_keyspace/80-", "replica"], auto_log=True)
        # After a backwards migration, queryservice should be enabled on source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other reasons than the migration,
        # so check the shard record instead of the tablets directly
        utils.check_shard_query_services(self, destination_shards, tablet.Tablet.tablet_type_value["REPLICA"], False)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n" "Partitions(rdonly): -80 80-c0 c0-\n" "Partitions(replica): -80 80-\n",
            keyspace_id_type=keyspace_id_type,
        )

        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "replica"], auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards, tablet.Tablet.tablet_type_value["REPLICA"], True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-\n"
            "Partitions(rdonly): -80 80-c0 c0-\n"
            "Partitions(replica): -80 80-c0 c0-\n",
            keyspace_id_type=keyspace_id_type,
        )

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl(["PlannedReparentShard", "test_keyspace/80-c0", shard_2_replica1.tablet_alias])
        logging.debug("Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use vtworker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ["-cell", "test_nj", "SplitDiff", "--exclude_tables", "unrelated", "test_keyspace/c0-"], auto_log=True
        )
        utils.run_vtctl(["ChangeSlaveType", shard_1_rdonly1.tablet_alias, "rdonly"], auto_log=True)
        utils.run_vtctl(["ChangeSlaveType", shard_3_rdonly1.tablet_alias, "rdonly"], auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug(
            "DELAY 1: %s max_lag=%d avg_lag=%d",
            monitor_thread_1.object_name,
            monitor_thread_1.max_lag,
            monitor_thread_1.lag_sum / monitor_thread_1.sample_count,
        )
        logging.debug(
            "DELAY 2: %s max_lag=%d avg_lag=%d",
            monitor_thread_2.object_name,
            monitor_thread_2.max_lag,
            monitor_thread_2.lag_sum / monitor_thread_2.sample_count,
        )

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(["SourceShardDelete", "test_keyspace/c0-", "0"], auto_log=True)
        utils.run_vtctl(
            ["SourceShardAdd", "--key_range=80-", "test_keyspace/c0-", "0", "test_keyspace/80-"], auto_log=True
        )

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(["MigrateServedTypes", "test_keyspace/80-", "master"], auto_log=True)
        utils.check_srv_keyspace(
            "test_nj",
            "test_keyspace",
            "Partitions(master): -80 80-c0 c0-\n"
            "Partitions(rdonly): -80 80-c0 c0-\n"
            "Partitions(replica): -80 80-c0 c0-\n",
            keyspace_id_type=keyspace_id_type,
        )
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn("No binlog player is running", shard_2_master_status)
        self.assertIn("</html>", shard_2_master_status)

        # scrap the original tablets in the original shard
        for t in [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(["ScrapTablet", t.tablet_alias], auto_log=True)
        tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly, shard_1_rdonly1])
        for t in [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(["DeleteTablet", t.tablet_alias], auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(["RebuildKeyspaceGraph", "test_keyspace"], auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(["RemoveShardCell", "test_keyspace/-80", "test_nj"], auto_log=True, expect_fail=True)
        utils.run_vtctl(["RemoveShardCell", "test_keyspace/80-", "test_nj"], auto_log=True)
        utils.run_vtctl(["RemoveShardCell", "test_keyspace/80-", "test_ny"], auto_log=True)
        shard = utils.run_vtctl_json(["GetShard", "test_keyspace/80-"])
        self.assertNotIn("cells", shard)

        # delete the original shard
        utils.run_vtctl(["DeleteShard", "test_keyspace/80-"], auto_log=True)

        # kill everything
        tablet.kill_tablets(
            [
                shard_0_master,
                shard_0_replica,
                shard_0_ny_rdonly,
                shard_2_master,
                shard_2_replica1,
                shard_2_replica2,
                shard_3_master,
                shard_3_replica,
                shard_3_rdonly1,
            ]
        )
Example #43
0
  def test_resharding(self):
    # create the keyspace with just one shard
    shard_master.start_vttablet(
        wait_for_state=None, target_tablet_type='replica',
        init_keyspace='test_keyspace', init_shard='0')
    shard_replica.start_vttablet(
        wait_for_state=None, target_tablet_type='replica',
        init_keyspace='test_keyspace', init_shard='0')
    shard_rdonly1.start_vttablet(
        wait_for_state=None, target_tablet_type='rdonly',
        init_keyspace='test_keyspace', init_shard='0')

    for t in [shard_master, shard_replica, shard_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
                     shard_master.tablet_alias], auto_log=True)

    for t in [shard_master, shard_replica, shard_rdonly1]:
      t.wait_for_vttablet_state('SERVING')

    # create the tables and add startup values
    self._create_schema()
    self._insert_startup_values()

    # reload schema on all tablets so we can query them
    for t in [shard_master, shard_replica, shard_rdonly1]:
      utils.run_vtctl(['ReloadSchema', t.tablet_alias], auto_log=True)

    # must start vtgate after tablets are up, or else wait until 1min refresh
    # we want cache_ttl at zero so we re-read the topology for every test query.
    utils.VtGate().start(cache_ttl='0')

    # check the Map Reduce API works correctly, should use ExecuteShards,
    # as we're not sharded yet.
    # we have 3 values in the database, asking for 4 splits will get us
    # a single query.
    sql = 'select id, msg from resharding1'
    s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
    self.assertEqual(len(s), 1)
    self.assertEqual(s[0]['shard_part']['shards'][0], '0')

    # change the schema, backfill keyspace_id, and change schema again
    self._add_sharding_key_to_schema()
    self._backfill_keyspace_id(shard_master)
    self._mark_sharding_key_not_null()

    # now we can be a sharded keyspace (and propagate to SrvKeyspace)
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'keyspace_id', keyspace_id_type])
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)

    # run a health check on source replica so it responds to discovery
    utils.run_vtctl(['RunHealthCheck', shard_replica.tablet_alias, 'replica'])

        # create the split shards
    shard_0_master.start_vttablet(
        wait_for_state=None, target_tablet_type='replica',
        init_keyspace='test_keyspace', init_shard='-80')
    shard_0_replica.start_vttablet(
        wait_for_state=None, target_tablet_type='replica',
        init_keyspace='test_keyspace', init_shard='-80')
    shard_0_rdonly1.start_vttablet(
        wait_for_state=None, target_tablet_type='rdonly',
        init_keyspace='test_keyspace', init_shard='-80')
    shard_1_master.start_vttablet(
        wait_for_state=None, target_tablet_type='replica',
        init_keyspace='test_keyspace', init_shard='80-')
    shard_1_replica.start_vttablet(
        wait_for_state=None, target_tablet_type='replica',
        init_keyspace='test_keyspace', init_shard='80-')
    shard_1_rdonly1.start_vttablet(
        wait_for_state=None, target_tablet_type='rdonly',
        init_keyspace='test_keyspace', init_shard='80-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    sharded_tablets = [shard_0_master, shard_0_replica, shard_0_rdonly1,
                       shard_1_master, shard_1_replica, shard_1_rdonly1]
    for t in sharded_tablets:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    for t in sharded_tablets:
      t.wait_for_vttablet_state('SERVING')

    # must restart vtgate after tablets are up, or else wait until 1min refresh
    # we want cache_ttl at zero so we re-read the topology for every test query.
    utils.vtgate.kill()
    utils.VtGate().start(cache_ttl='0')

    # check the Map Reduce API works correctly, should use ExecuteKeyRanges now,
    # as we are sharded (with just one shard).
    # again, we have 3 values in the database, asking for 4 splits will get us
    # a single query.
    sql = 'select id, msg from resharding1'
    s = utils.vtgate.split_query(sql, 'test_keyspace', 4)
    self.assertEqual(len(s), 1)
    self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
    # There must be one empty KeyRange which represents the full keyspace.
    self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
    self.assertEqual(s[0]['key_range_part']['key_ranges'][0], {})

    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -\n'
                             'Partitions(replica): -\n',
                             keyspace_id_type=keyspace_id_type)

    # we need to create the schema, and the worker will do data copying
    for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
      utils.run_vtctl(['CopySchemaShard',
                       '--exclude_tables', 'unrelated',
                       shard_rdonly1.tablet_alias,
                       keyspace_shard],
                      auto_log=True)
    utils.run_vtctl(['RunHealthCheck', shard_rdonly1.tablet_alias, 'rdonly'])

    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        'SplitClone',
                        '--exclude_tables', 'unrelated',
                        '--source_reader_count', '10',
                        '--min_table_size_for_split', '1',
                        'test_keyspace/0'],
                       auto_log=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check the binlog players are running
    logging.debug('Waiting for binlog players to start on new masters...')
    shard_0_master.wait_for_binlog_player_count(1)
    shard_1_master.wait_for_binlog_player_count(1)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 5)
    if v != 100:
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 20)
    logging.debug('Checking no data was sent the wrong way')
    self._check_lots_not_present(1000)

    # use vtworker to compare the data
    logging.debug('Running vtworker SplitDiff for -80')
    for t in [shard_0_rdonly1, shard_1_rdonly1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/-80'],
                       auto_log=True)

    logging.debug('Running vtworker SplitDiff for 80-')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/80-'],
                       auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    expect_fail=True)

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -\n',
                             keyspace_id_type=keyspace_id_type)

    # make sure rdonly tablets are back to serving before hitting vtgate.
    for t in [shard_0_rdonly1, shard_1_rdonly1]:
      t.wait_for_vttablet_state('SERVING')

    # check the Map Reduce API works correctly, should use ExecuteKeyRanges
    # on both destination shards now.
    # we ask for 2 splits to only have one per shard
    sql = 'select id, msg from resharding1'
    s = utils.vtgate.split_query(sql, 'test_keyspace', 2)
    self.assertEqual(len(s), 2)
    self.assertEqual(s[0]['key_range_part']['keyspace'], 'test_keyspace')
    self.assertEqual(s[1]['key_range_part']['keyspace'], 'test_keyspace')
    self.assertEqual(len(s[0]['key_range_part']['key_ranges']), 1)
    self.assertEqual(len(s[1]['key_range_part']['key_ranges']), 1)

    # then serve replica from the split shards
    source_tablet = shard_replica
    destination_tablets = [shard_0_replica, shard_1_replica]

    utils.run_vtctl(
        ['MigrateServedTypes', 'test_keyspace/0', 'replica'], auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)

    # move replica back and forth
    utils.run_vtctl(
        ['MigrateServedTypes', '-reverse', 'test_keyspace/0', 'replica'],
        auto_log=True)
    # After a backwards migration, queryservice should be enabled on
    # source and disabled on destinations
    utils.check_tablet_query_service(self, source_tablet, True, False)
    utils.check_tablet_query_services(self, destination_tablets, False, True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -\n',
                             keyspace_id_type=keyspace_id_type)

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'replica'],
                    auto_log=True)
    # After a forwards migration, queryservice should be disabled on
    # source and enabled on destinations
    utils.check_tablet_query_service(self, source_tablet, False, True)
    utils.check_tablet_query_services(self, destination_tablets, True, False)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)

    # then serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)

    # check the binlog players are gone now
    shard_0_master.wait_for_binlog_player_count(0)
    shard_1_master.wait_for_binlog_player_count(0)

    # make sure we can't delete a shard with tablets
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

    # remove the original tablets in the original shard
    tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
    for t in [shard_replica, shard_rdonly1]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    utils.run_vtctl(['DeleteTablet', '-allow_master',
                     shard_master.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)

    # kill everything else
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly1,
                         shard_1_master, shard_1_replica, shard_1_rdonly1])
Example #44
0
  def test_resharding(self):
    # we're going to reparent and swap these two
    global shard_2_master, shard_2_replica1

    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'bad_column',
                     '--sharding_column_type', 'bytes',
                     '--split_shard_count', '2',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'keyspace_id', 'uint64'], expect_fail=True)
    utils.run_vtctl(['SetKeyspaceShardingInfo',
                     '-force', '-split_shard_count', '4',
                     'test_keyspace', 'keyspace_id', keyspace_id_type])

    shard_0_master.init_tablet('master', 'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
    shard_1_master.init_tablet('master', 'test_keyspace', '80-')
    shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
    shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
    shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['split_shard_count'], 4)

    # we set full_mycnf_args to True as a test in the KIT_BYTES case
    full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_0_ny_rdonly,
              shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
              shard_1_rdonly1]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args)

    # wait for the tablets
    shard_0_master.wait_for_vttablet_state('SERVING')
    shard_0_replica.wait_for_vttablet_state('SERVING')
    shard_0_ny_rdonly.wait_for_vttablet_state('SERVING')
    shard_1_master.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('SERVING')
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
    shard_1_ny_rdonly.wait_for_vttablet_state('SERVING')
    shard_1_rdonly1.wait_for_vttablet_state('SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()
    self._test_keyrange_constraints()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_slave1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'replica'])
    for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias, 'rdonly'])

    # create the split shards
    shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
    shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-c0')
    shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-c0')
    shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
    shard_3_replica.init_tablet('spare', 'test_keyspace', 'c0-')
    shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    shard_3_master.start_vttablet(wait_for_state=None,
                                  target_tablet_type='replica')
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_3_replica, shard_3_rdonly1]:
      t.start_vttablet(wait_for_state=None)
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_3_master, shard_3_replica, shard_3_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-c0',
                     shard_2_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/c0-',
                     shard_3_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n',
        keyspace_id_type=keyspace_id_type)

    # the worker will do everything. We test with source_reader_count=10
    # (down from default=20) as connection pool is not big enough for 20.
    # min_table_size_for_split is set to 1 as to force a split even on the
    # small table we have.
    # we need to create the schema, and the worker will do data copying
    for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
      utils.run_vtctl(['CopySchemaShard', '--exclude_tables', 'unrelated',
                       shard_1_rdonly1.tablet_alias, keyspace_shard],
                      auto_log=True)

    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        'SplitClone',
                        '--exclude_tables', 'unrelated',
                        '--source_reader_count', '10',
                        '--min_table_size_for_split', '1',
                        'test_keyspace/80-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias,
                     'rdonly'], auto_log=True)

    # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', '--exclude_tables=unrelated',
                     'test_keyspace'], auto_log=True)

    # check the binlog players are running and exporting vars
    shard_2_master.wait_for_binlog_player_count(1)
    shard_3_master.wait_for_binlog_player_count(1)
    self._check_binlog_player_vars(shard_2_master)
    self._check_binlog_player_vars(shard_3_master)

    # check that binlog server exported the stats vars
    self._check_binlog_server_vars(shard_1_slave1)

    self._check_stream_health_equals_binlog_player_vars(shard_2_master)
    self._check_stream_health_equals_binlog_player_vars(shard_3_master)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 5)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 20)
    logging.debug('Checking no data was sent the wrong way')
    self._check_lots_not_present(1000)
    self._check_binlog_player_vars(shard_2_master, seconds_behind_master_max=30)
    self._check_binlog_player_vars(shard_3_master, seconds_behind_master_max=30)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias, 'rdonly'])
    logging.debug('Running vtworker SplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', '--exclude_tables',
                        'unrelated', 'test_keyspace/c0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # get status for a destination master tablet, make sure we have it all
    shard_2_master_status = shard_2_master.get_status()
    self.assertIn('Binlog player state: Running', shard_2_master_status)
    self.assertIn(
        '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br>'
        '<b>Transaction</b>: 2000<br></td>', shard_2_master_status)
    self.assertIn('</html>', shard_2_master_status)

    # start a thread to insert data into shard_1 in the background
    # with current time, and monitor the delay
    insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 10000,
                                   0x9000000000000000)
    insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 10001,
                                   0xD000000000000000)
    monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low')
    monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high')

    # tests a failover switching serving to a different replica
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
    utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias, 'replica'])

    # test data goes through again
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000, base=1000)
    logging.debug('Checking 80 percent of data was sent quickly')
    self._check_lots_timeout(1000, 80, 5, base=1000)

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    expect_fail=True)

    # check query service is off on master 2 and master 3, as filtered
    # replication is enabled. Even health check that is enabled on
    # master 3 should not interfere (we run it to be sure).
    utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias, 'replica'],
                    auto_log=True)
    for master in [shard_2_master, shard_3_master]:
      utils.check_tablet_query_service(self, master, False, False)
      stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                            '-count', '1',
                                            master.tablet_alias])
      logging.debug('Got health: %s', str(stream_health))
      self.assertIn('realtime_stats', stream_health)
      self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_3_master.get_healthz()

    # now serve rdonly from the split shards, in test_nj only
    utils.run_vtctl(['MigrateServedTypes', '--cells=test_nj',
                     'test_keyspace/80-', 'rdonly'], auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)
    utils.check_srv_keyspace('test_ny', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)
    utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

    # now serve rdonly from the split shards, everywhere
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)
    utils.check_srv_keyspace('test_ny', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)
    utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
    utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

    # then serve replica from the split shards
    destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=keyspace_id_type)
    utils.check_tablet_query_service(self, shard_1_slave2, False, True)

    # move replica back and forth
    utils.run_vtctl(
        ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
        auto_log=True)
    # After a backwards migration, queryservice should be enabled on
    # source and disabled on destinations
    utils.check_tablet_query_service(self, shard_1_slave2, True, False)
    # Destination tablets would have query service disabled for other
    # reasons than the migration, so check the shard record instead of
    # the tablets directly.
    utils.check_shard_query_services(self, destination_shards,
                                     topodata_pb2.REPLICA, False)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type)

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    # After a forwards migration, queryservice should be disabled on
    # source and enabled on destinations
    utils.check_tablet_query_service(self, shard_1_slave2, False, True)
    # Destination tablets would have query service disabled for other
    # reasons than the migration, so check the shard record instead of
    # the tablets directly
    utils.check_shard_query_services(self, destination_shards,
                                     topodata_pb2.REPLICA, True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=keyspace_id_type)

    # reparent shard_2 to shard_2_replica1, then insert more data and
    # see it flow through still
    utils.run_vtctl(['PlannedReparentShard', 'test_keyspace/80-c0',
                     shard_2_replica1.tablet_alias])

    # update our test variables to point at the new master
    shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master

    logging.debug('Inserting lots of data on source shard after reparenting')
    self._insert_lots(3000, base=2000)
    logging.debug('Checking 80 percent of data was sent fairly quickly')
    self._check_lots_timeout(3000, 80, 10, base=2000)

    # use vtworker to compare the data again
    logging.debug('Running vtworker SplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', '--exclude_tables',
                        'unrelated', 'test_keyspace/c0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    # going to migrate the master now, check the delays
    monitor_thread_1.done = True
    monitor_thread_2.done = True
    insert_thread_1.done = True
    insert_thread_2.done = True
    logging.debug('DELAY 1: %s max_lag=%d avg_lag=%d',
                  monitor_thread_1.object_name,
                  monitor_thread_1.max_lag,
                  monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
    logging.debug('DELAY 2: %s max_lag=%d avg_lag=%d',
                  monitor_thread_2.object_name,
                  monitor_thread_2.max_lag,
                  monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

    # mock with the SourceShard records to test 'vtctl SourceShardDelete'
    # and 'vtctl SourceShardAdd'
    utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                    auto_log=True)
    utils.run_vtctl(['SourceShardAdd', '--key_range=80-',
                     'test_keyspace/c0-', '0', 'test_keyspace/80-'],
                    auto_log=True)

    # then serve master from the split shards, make sure the source master's
    # query service is now turned off
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-c0 c0-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=keyspace_id_type)
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    shard_2_master.wait_for_binlog_player_count(0)
    shard_3_master.wait_for_binlog_player_count(0)

    # get status for a destination master tablet, make sure it's good
    shard_2_master_status = shard_2_master.get_status()
    self.assertIn('No binlog player is running', shard_2_master_status)
    self.assertIn('</html>', shard_2_master_status)

    # delete the original tablets in the original shard
    tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2,
                         shard_1_ny_rdonly, shard_1_rdonly1])
    for t in [shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
              shard_1_rdonly1]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    utils.run_vtctl(['DeleteTablet', '-allow_master',
                     shard_1_master.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(
        ['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # test RemoveShardCell
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True,
        expect_fail=True)
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True)
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True)
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
    self.assertNotIn('cells', shard)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

    # kill everything
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                         shard_2_master, shard_2_replica1, shard_2_replica2,
                         shard_3_master, shard_3_replica, shard_3_rdonly1])
Example #45
0
  def test_cells_aliases(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'custom_ksid_col',
                     '--sharding_column_type', base_sharding.keyspace_id_type,
                     'test_keyspace'])

    shard_0_master.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
    shard_1_master.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

    # we set full_mycnf_args to True as a test in the KIT_BYTES case
    full_mycnf_args = (base_sharding.keyspace_id_type ==
                       keyrange_constants.KIT_BYTES)

    # create databases so vttablet can start behaving somewhat normally
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args,
                       binlog_use_v3_resharding_mode=False)

    # wait for the tablets (replication is not setup, they won't be healthy)
    for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
              shard_1_master, shard_1_replica, shard_1_rdonly]:
      t.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    # check the shards
    shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
    self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
    self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
    self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

    # create the tables
    self._create_schema()
    self._insert_startup_values()


    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # Make sure srv keyspace graph looks as expected
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n',
        keyspace_id_type=base_sharding.keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    utils.check_srv_keyspace(
        'test_ny', 'test_keyspace',
        'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n',
        keyspace_id_type=base_sharding.keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    # Bootstrap vtgate

    utils.apply_vschema(vschema)

    # Adds alias so vtgate can route to replica/rdonly tablets that are not in the same cell, but same alias

    if use_alias:
      utils.run_vtctl(['AddCellsAlias', '-cells', 'test_nj,test_ny','region_east_coast'], auto_log=True)
      tablet_types_to_wait='MASTER,REPLICA'
    else:
      tablet_types_to_wait='MASTER'

    utils.VtGate().start(
      tablets=[shard_0_master, shard_1_master],
      tablet_types_to_wait=tablet_types_to_wait,
      cells_to_watch='test_nj,test_ny',
    )
    utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1)
    utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1)

    vtgate_conn = self._get_connection()
    result = self._execute_on_tablet_type(
        vtgate_conn,
        'master',
        'select count(*) from test_table', {})
    self.assertEqual(
        result,
        ([(3,)], 1, 0,
         [('count(*)', self.int_type)]))

    if use_alias:
      vtgate_conn = self._get_connection()
      result = self._execute_on_tablet_type(
          vtgate_conn,
          'master',
          'select count(*) from test_table', {})
      self.assertEqual(
          result,
          ([(3,)], 1, 0,
           [('count(*)', self.int_type)]))

      vtgate_conn = self._get_connection()
      result = self._execute_on_tablet_type(
          vtgate_conn,
          'replica',
          'select count(*) from test_table', {})
      self.assertEqual(
          result,
          ([(3,)], 1, 0,
           [('count(*)', self.int_type)]))

      vtgate_conn = self._get_connection()
      result = self._execute_on_tablet_type(
          vtgate_conn,
          'rdonly',
          'select count(*) from test_table', {})
      self.assertEqual(
          result,
          ([(3,)], 1, 0,
           [('count(*)', self.int_type)]))
    else:
      vtgate_conn = self._get_connection()
      try:
        self._execute_on_tablet_type(
            vtgate_conn,
            'replica',
            'select count(*) from test_table', {})
        self.fail('Expected execute to fail, did not get error')
      except Exception as e:
        s = str(e)
        self.assertIn('80.replica, no valid tablet: node', s)

      vtgate_conn = self._get_connection()
      try:
        self._execute_on_tablet_type(
            vtgate_conn,
            'rdonly',
            'select count(*) from test_table', {})
        self.fail('Expected execute to fail, did not get error')
      except Exception as e:
        s = str(e)
        self.assertIn('80.rdonly, no valid tablet: node', s)
Example #46
0
  def test_resharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'bad_column',
                     '--sharding_column_type', 'bytes',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'custom_ksid_col', 'uint64'], expect_fail=True)
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force',
                     'test_keyspace', 'custom_ksid_col', keyspace_id_type])

    shard_0_master.init_tablet('master', 'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
    shard_1_master.init_tablet('master', 'test_keyspace', '80-')
    shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
    shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
    ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
    self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

    # we set full_mycnf_args to True as a test in the KIT_BYTES case
    full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

    # create databases so vttablet can start behaving somewhat normally
    for t in [shard_0_master, shard_0_replica, shard_0_ny_rdonly,
              shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
              shard_1_rdonly1]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args)

    # wait for the tablets (replication is not setup, the slaves won't be
    # healthy)
    shard_0_master.wait_for_vttablet_state('SERVING')
    shard_0_replica.wait_for_vttablet_state('NOT_SERVING')
    shard_0_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
    shard_1_master.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')
    shard_1_ny_rdonly.wait_for_vttablet_state('NOT_SERVING')
    shard_1_rdonly1.wait_for_vttablet_state('NOT_SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    # check the shards
    shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
    self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
    self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
    self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

    # create the tables
    self._create_schema()
    self._insert_startup_values()

    # run a health check on source replicas so they respond to discovery
    # (for binlog players) and on the source rdonlys (for workers)
    for t in [shard_0_replica, shard_1_slave1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
    for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
      utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

    # create the split shards
    shard_2_master.init_tablet('master', 'test_keyspace', '80-c0')
    shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
    shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
    shard_3_master.init_tablet('master', 'test_keyspace', 'c0-')
    shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
    shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    shard_2_master.start_vttablet(wait_for_state=None)
    shard_3_master.start_vttablet(wait_for_state=None)
    for t in [shard_2_replica1, shard_2_replica2,
              shard_3_replica, shard_3_rdonly1]:
      t.start_vttablet(wait_for_state=None)
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_3_master, shard_3_replica, shard_3_rdonly1]:
      t.wait_for_vttablet_state('NOT_SERVING')

    utils.run_vtctl(['InitShardMaster', 'test_keyspace/80-c0',
                     shard_2_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['InitShardMaster', 'test_keyspace/c0-',
                     shard_3_master.tablet_alias], auto_log=True)

    # check the shards
    shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
    for s in ['-80', '80-', '80-c0', 'c0-']:
      self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
    self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace(
        'test_nj', 'test_keyspace',
        'Partitions(master): -80 80-\n'
        'Partitions(rdonly): -80 80-\n'
        'Partitions(replica): -80 80-\n',
        keyspace_id_type=keyspace_id_type,
        sharding_column_name='custom_ksid_col')

    # disable shard_1_slave2, so we're sure filtered replication will go
    # from shard_1_slave1
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

    # we need to create the schema, and the worker will do data copying
    for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
      utils.run_vtctl(['CopySchemaShard', '--exclude_tables', 'unrelated',
                       shard_1_rdonly1.tablet_alias, keyspace_shard],
                      auto_log=True)

    # --max_tps is only specified to enable the throttler and ensure that the
    # code is executed. But the intent here is not to throttle the test, hence
    # the rate limit is set very high.
    utils.run_vtworker(['--cell', 'test_nj',
                        '--command_display_interval', '10ms',
                        'LegacySplitClone',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        '--max_tps', '9999',
                        'test_keyspace/80-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias,
                     'rdonly'], auto_log=True)

    # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', '--exclude_tables=unrelated',
                     'test_keyspace'], auto_log=True)

    # check the binlog players are running and exporting vars
    self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
    self.check_destination_master(shard_3_master, ['test_keyspace/80-'])

    # check that binlog server exported the stats vars
    self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

    # Check that the throttler was enabled.
    self.check_throttler_service(shard_2_master.rpc_endpoint(),
                                 ['BinlogPlayer/0'], 9999)
    self.check_throttler_service(shard_3_master.rpc_endpoint(),
                                 ['BinlogPlayer/0'], 9999)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000)
    logging.debug('Checking 80 percent of data is sent quickly')
    v = self._check_lots_timeout(1000, 80, 5)
    if v != 100:
      # small optimization: only do this check if we don't have all the data
      # already anyway.
      logging.debug('Checking all data goes through eventually')
      self._check_lots_timeout(1000, 100, 20)
    logging.debug('Checking no data was sent the wrong way')
    self._check_lots_not_present(1000)
    self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                  seconds_behind_master_max=30)
    self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                  seconds_behind_master_max=30)
    self.check_binlog_server_vars(shard_1_slave1, horizontal=True,
                                  min_statements=1000, min_transactions=1000)

    # use vtworker to compare the data (after health-checking the destination
    # rdonly tablets so discovery works)
    utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
    logging.debug('Running vtworker SplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        'test_keyspace/c0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause('Good time to test vtworker for diffs')

    # get status for destination master tablets, make sure we have it all
    self.check_running_binlog_player(shard_2_master, 4000, 2000)
    self.check_running_binlog_player(shard_3_master, 4000, 2000)

    # tests a failover switching serving to a different replica
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
    utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

    # test data goes through again
    logging.debug('Inserting lots of data on source shard')
    self._insert_lots(1000, base=1000)
    logging.debug('Checking 80 percent of data was sent quickly')
    self._check_lots_timeout(1000, 80, 5, base=1000)
    self.check_binlog_server_vars(shard_1_slave2, horizontal=True,
                                  min_statements=800, min_transactions=800)

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    expect_fail=True)

    # check query service is off on master 2 and master 3, as filtered
    # replication is enabled. Even health check that is enabled on
    # master 3 should not interfere (we run it to be sure).
    utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                    auto_log=True)
    for master in [shard_2_master, shard_3_master]:
      utils.check_tablet_query_service(self, master, False, False)
      stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
                                            '-count', '1',
                                            master.tablet_alias])
      logging.debug('Got health: %s', str(stream_health))
      self.assertIn('realtime_stats', stream_health)
      self.assertNotIn('serving', stream_health)

    # check the destination master 3 is healthy, even though its query
    # service is not running (if not healthy this would exception out)
    shard_3_master.get_healthz()

    # now serve rdonly from the split shards, in test_nj only
    utils.run_vtctl(['MigrateServedTypes', '--cells=test_nj',
                     'test_keyspace/80-', 'rdonly'], auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_srv_keyspace('test_ny', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

    # now serve rdonly from the split shards, everywhere
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_srv_keyspace('test_ny', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
    utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
    utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

    # then serve replica from the split shards
    destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_1_slave2, False, True)

    # move replica back and forth
    utils.run_vtctl(
        ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
        auto_log=True)
    # After a backwards migration, queryservice should be enabled on
    # source and disabled on destinations
    utils.check_tablet_query_service(self, shard_1_slave2, True, False)
    # Destination tablets would have query service disabled for other
    # reasons than the migration, so check the shard record instead of
    # the tablets directly.
    utils.check_shard_query_services(self, destination_shards,
                                     topodata_pb2.REPLICA, False)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    # After a forwards migration, queryservice should be disabled on
    # source and enabled on destinations
    utils.check_tablet_query_service(self, shard_1_slave2, False, True)
    # Destination tablets would have query service disabled for other
    # reasons than the migration, so check the shard record instead of
    # the tablets directly
    utils.check_shard_query_services(self, destination_shards,
                                     topodata_pb2.REPLICA, True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')

    # use vtworker to compare the data again
    logging.debug('Running vtworker SplitDiff')
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff',
                        '--exclude_tables', 'unrelated',
                        '--min_healthy_rdonly_tablets', '1',
                        'test_keyspace/c0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
                    auto_log=True)

    # mock with the SourceShard records to test 'vtctl SourceShardDelete'
    # and 'vtctl SourceShardAdd'
    utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                    auto_log=True)
    utils.run_vtctl(['SourceShardAdd', '--key_range=80-',
                     'test_keyspace/c0-', '0', 'test_keyspace/80-'],
                    auto_log=True)

    # then serve master from the split shards, make sure the source master's
    # query service is now turned off
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-c0 c0-\n'
                             'Partitions(rdonly): -80 80-c0 c0-\n'
                             'Partitions(replica): -80 80-c0 c0-\n',
                             keyspace_id_type=keyspace_id_type,
                             sharding_column_name='custom_ksid_col')
    utils.check_tablet_query_service(self, shard_1_master, False, True)

    # check the binlog players are gone now
    self.check_no_binlog_player(shard_2_master)
    self.check_no_binlog_player(shard_3_master)

    # delete the original tablets in the original shard
    tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2,
                         shard_1_ny_rdonly, shard_1_rdonly1])
    for t in [shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
              shard_1_rdonly1]:
      utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
    utils.run_vtctl(['DeleteTablet', '-allow_master',
                     shard_1_master.tablet_alias], auto_log=True)

    # rebuild the serving graph, all mentions of the old shards shoud be gone
    utils.run_vtctl(
        ['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # test RemoveShardCell
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True,
        expect_fail=True)
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True)
    utils.run_vtctl(
        ['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True)
    shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
    self.assertNotIn('cells', shard)

    # delete the original shard
    utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

    # kill everything
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                         shard_2_master, shard_2_replica1, shard_2_replica2,
                         shard_3_master, shard_3_replica, shard_3_rdonly1])
Example #47
0
    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'custom_ksid_col',
            'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'custom_ksid_col', keyspace_id_type
        ])

        shard_0_master.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
        shard_1_master.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
        shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        ks = utils.run_vtctl_json(
            ['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
        self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving somewhat normally
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets (replication is not setup, they won't be healthy)
        for t in [
                shard_0_master, shard_0_replica, shard_0_ny_rdonly,
                shard_1_master, shard_1_slave1, shard_1_slave2,
                shard_1_ny_rdonly, shard_1_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
        self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # run a health check on source replicas so they respond to discovery
        # (for binlog players) and on the source rdonlys (for workers)
        for t in [shard_0_replica, shard_1_slave1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
        for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
            utils.run_vtctl(['RunHealthCheck', t.tablet_alias])

        # create the split shards
        shard_2_master.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
        shard_3_master.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
        shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        shard_2_master.start_vttablet(wait_for_state=None)
        shard_3_master.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_replica1, shard_2_replica2, shard_3_replica,
                shard_3_rdonly1
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly1
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/80-c0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'InitShardMaster', '-force', 'test_keyspace/c0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        # check the shards
        shards = utils.run_vtctl_json(
            ['FindAllShardsInKeyspace', 'test_keyspace'])
        for s in ['-80', '80-', '80-c0', 'c0-']:
            self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
        self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')

        # disable shard_1_slave2, so we're sure filtered replication will go
        # from shard_1_slave1
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')

        # we need to create the schema, and the worker will do data copying
        for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
            utils.run_vtctl([
                'CopySchemaShard', '--exclude_tables', 'unrelated',
                shard_1_rdonly1.tablet_alias, keyspace_shard
            ],
                            auto_log=True)

        # --max_tps is only specified to enable the throttler and ensure that the
        # code is executed. But the intent here is not to throttle the test, hence
        # the rate limit is set very high.
        utils.run_vtworker([
            '--cell', 'test_nj', '--command_display_interval', '10ms',
            'LegacySplitClone', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', '--max_tps', '9999',
            'test_keyspace/80-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # TODO(alainjobart): experiment with the dontStartBinlogPlayer option

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl([
            'ValidateSchemaKeyspace', '--exclude_tables=unrelated',
            'test_keyspace'
        ],
                        auto_log=True)

        # check the binlog players are running and exporting vars
        self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
        self.check_destination_master(shard_3_master, ['test_keyspace/80-'])

        # check that binlog server exported the stats vars
        self.check_binlog_server_vars(shard_1_slave1, horizontal=True)

        # Check that the throttler was enabled.
        self.check_throttler_service(shard_2_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)
        self.check_throttler_service(shard_3_master.rpc_endpoint(),
                                     ['BinlogPlayer/0'], 9999)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000)
        logging.debug('Checking 80 percent of data is sent quickly')
        v = self._check_lots_timeout(1000, 80, 5)
        if v != 100:
            # small optimization: only do this check if we don't have all the data
            # already anyway.
            logging.debug('Checking all data goes through eventually')
            self._check_lots_timeout(1000, 100, 20)
        logging.debug('Checking no data was sent the wrong way')
        self._check_lots_not_present(1000)
        self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
                                      seconds_behind_master_max=30)
        self.check_binlog_server_vars(shard_1_slave1,
                                      horizontal=True,
                                      min_statements=1000,
                                      min_transactions=1000)

        # use vtworker to compare the data (after health-checking the destination
        # rdonly tablets so discovery works)
        utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause('Good time to test vtworker for diffs')

        # get status for destination master tablets, make sure we have it all
        self.check_running_binlog_player(shard_2_master, 4000, 2000)
        self.check_running_binlog_player(shard_3_master, 4000, 2000)

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
        utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])

        # test data goes through again
        logging.debug('Inserting lots of data on source shard')
        self._insert_lots(1000, base=1000)
        logging.debug('Checking 80 percent of data was sent quickly')
        self._check_lots_timeout(1000, 80, 5, base=1000)
        self.check_binlog_server_vars(shard_1_slave2,
                                      horizontal=True,
                                      min_statements=800,
                                      min_transactions=800)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # check query service is off on master 2 and master 3, as filtered
        # replication is enabled. Even health check that is enabled on
        # master 3 should not interfere (we run it to be sure).
        utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
                        auto_log=True)
        for master in [shard_2_master, shard_3_master]:
            utils.check_tablet_query_service(self, master, False, False)
            stream_health = utils.run_vtctl_json(
                ['VtTabletStreamHealth', '-count', '1', master.tablet_alias])
            logging.debug('Got health: %s', str(stream_health))
            self.assertIn('realtime_stats', stream_health)
            self.assertNotIn('serving', stream_health)

        # check the destination master 3 is healthy, even though its query
        # service is not running (if not healthy this would exception out)
        shard_3_master.get_healthz()

        # now serve rdonly from the split shards, in test_nj only
        utils.run_vtctl([
            'MigrateServedTypes', '--cells=test_nj', 'test_keyspace/80-',
            'rdonly'
        ],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # now serve rdonly from the split shards, everywhere
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_srv_keyspace('test_ny',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
        utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
        utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)

        # then serve replica from the split shards
        destination_shards = ['test_keyspace/80-c0', 'test_keyspace/c0-']

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        # After a backwards migration, queryservice should be enabled on
        # source and disabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, True, False)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly.
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, False)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')

        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        # After a forwards migration, queryservice should be disabled on
        # source and enabled on destinations
        utils.check_tablet_query_service(self, shard_1_slave2, False, True)
        # Destination tablets would have query service disabled for other
        # reasons than the migration, so check the shard record instead of
        # the tablets directly
        utils.check_shard_query_services(self, destination_shards,
                                         topodata_pb2.REPLICA, True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')

        # use vtworker to compare the data again
        logging.debug('Running vtworker SplitDiff')
        utils.run_vtworker([
            '-cell', 'test_nj', 'SplitDiff', '--exclude_tables', 'unrelated',
            '--min_healthy_rdonly_tablets', '1', 'test_keyspace/c0-'
        ],
                           auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
            auto_log=True)

        # mock with the SourceShard records to test 'vtctl SourceShardDelete'
        # and 'vtctl SourceShardAdd'
        utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '0'],
                        auto_log=True)
        utils.run_vtctl([
            'SourceShardAdd', '--key_range=80-', 'test_keyspace/c0-', '0',
            'test_keyspace/80-'
        ],
                        auto_log=True)

        # then serve master from the split shards, make sure the source master's
        # query service is now turned off
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-c0 c0-\n'
                                 'Partitions(rdonly): -80 80-c0 c0-\n'
                                 'Partitions(replica): -80 80-c0 c0-\n',
                                 keyspace_id_type=keyspace_id_type,
                                 sharding_column_name='custom_ksid_col')
        utils.check_tablet_query_service(self, shard_1_master, False, True)

        # check the binlog players are gone now
        self.check_no_binlog_player(shard_2_master)
        self.check_no_binlog_player(shard_3_master)

        # delete the original tablets in the original shard
        tablet.kill_tablets([
            shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
            shard_1_rdonly1
        ])
        for t in [
                shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
                shard_1_rdonly1
        ]:
            utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
        utils.run_vtctl(
            ['DeleteTablet', '-allow_master', shard_1_master.tablet_alias],
            auto_log=True)

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_ny'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        self.assertNotIn('cells', shard)

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_0_ny_rdonly, shard_2_master,
            shard_2_replica1, shard_2_replica2, shard_3_master,
            shard_3_replica, shard_3_rdonly1
        ])
Example #48
0
    def test_resharding(self):
        utils.run_vtctl([
            'CreateKeyspace', '--sharding_column_name', 'bad_column',
            '--sharding_column_type', 'bytes', 'test_keyspace'
        ])
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', 'test_keyspace', 'keyspace_id', 'uint64'
        ],
                        expect_fail=True)
        utils.run_vtctl([
            'SetKeyspaceShardingInfo', '-force', 'test_keyspace',
            'keyspace_id', keyspace_id_type
        ])

        shard_0_master.init_tablet('master', 'test_keyspace', '-80')
        shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
        shard_1_master.init_tablet('master', 'test_keyspace', '80-')
        shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
        shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
        shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # we set full_mycnf_args to True as a test in the KIT_BYTES case
        full_mycnf_args = keyspace_id_type == keyrange_constants.KIT_BYTES

        # create databases so vttablet can start behaving normally
        for t in [
                shard_0_master, shard_0_replica, shard_1_master,
                shard_1_slave1, shard_1_slave2, shard_1_rdonly
        ]:
            t.create_db('vt_test_keyspace')
            t.start_vttablet(wait_for_state=None,
                             full_mycnf_args=full_mycnf_args)

        # wait for the tablets
        shard_0_master.wait_for_vttablet_state('SERVING')
        shard_0_replica.wait_for_vttablet_state('SERVING')
        shard_1_master.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('SERVING')
        shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')  # spare
        shard_1_rdonly.wait_for_vttablet_state('SERVING')

        # reparent to make the tablets work
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/-80',
            shard_0_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-',
            shard_1_master.tablet_alias
        ],
                        auto_log=True)

        # create the tables
        self._create_schema()
        self._insert_startup_values()

        # create the split shards
        shard_2_master.init_tablet('master', 'test_keyspace', '80-C0')
        shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-C0')
        shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-C0')
        shard_3_master.init_tablet('master', 'test_keyspace', 'C0-')
        shard_3_replica.init_tablet('spare', 'test_keyspace', 'C0-')
        shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'C0-')

        # start vttablet on the split shards (no db created,
        # so they're all not serving)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly
        ]:
            t.start_vttablet(wait_for_state=None)
        for t in [
                shard_2_master, shard_2_replica1, shard_2_replica2,
                shard_3_master, shard_3_replica, shard_3_rdonly
        ]:
            t.wait_for_vttablet_state('NOT_SERVING')

        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/80-C0',
            shard_2_master.tablet_alias
        ],
                        auto_log=True)
        utils.run_vtctl([
            'ReparentShard', '-force', 'test_keyspace/C0-',
            shard_3_master.tablet_alias
        ],
                        auto_log=True)

        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # take the snapshot for the split
        utils.run_vtctl(
            ['MultiSnapshot', '--spec=80-C0-', shard_1_slave1.tablet_alias],
            auto_log=True)

        # the snapshot_copy hook will copy the snapshot files to
        # VTDATAROOT/tmp/... as a test. We want to use these for one half,
        # but not for the other, so we test both scenarios.
        os.unlink(
            os.path.join(
                environment.tmproot, "snapshot-from-%s-for-%s.tar" %
                (shard_1_slave1.tablet_alias, "80-C0")))

        # wait for tablet's binlog server service to be enabled after snapshot
        shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # perform the restores: first one from source tablet. We removed the
        # storage backup, so it's coming from the tablet itself.
        utils.run_vtctl([
            'ShardMultiRestore', '-strategy=populateBlpCheckpoint',
            'test_keyspace/80-C0', shard_1_slave1.tablet_alias
        ],
                        auto_log=True)

        # second restore from storage: to be sure, we stop vttablet, and restart
        # it afterwards
        shard_1_slave1.kill_vttablet()
        utils.run_vtctl([
            'ShardMultiRestore', '-strategy=populateBlpCheckpoint',
            'test_keyspace/C0-', shard_1_slave1.tablet_alias
        ],
                        auto_log=True)
        shard_1_slave1.start_vttablet(wait_for_state=None)
        shard_1_slave1.wait_for_binlog_server_state("Enabled")

        # check the startup values are in the right place
        self._check_startup_values()

        # check the schema too
        utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'],
                        auto_log=True)

        # check the binlog players are running
        shard_2_master.wait_for_binlog_player_count(1)
        shard_3_master.wait_for_binlog_player_count(1)

        # check that binlog server exported the stats vars
        self._check_binlog_server_vars(shard_1_slave1)

        # testing filtered replication: insert a bunch of data on shard 1,
        # check we get most of it after a few seconds, wait for binlog server
        # timeout, check we get all of it.
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000)
        logging.debug("Checking 80 percent of data is sent quickly")
        self._check_lots_timeout(1000, 80, 5)
        logging.debug("Checking all data goes through eventually")
        self._check_lots_timeout(1000, 100, 20)
        logging.debug("Checking no data was sent the wrong way")
        self._check_lots_not_present(1000)

        # use the vtworker checker to compare the data
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        utils.pause("Good time to test vtworker for diffs")

        # get status for a destination master tablet, make sure we have it all
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('Binlog player state: Running', shard_2_master_status)
        self.assertIn(
            '<td><b>All</b>: 6000<br><b>Query</b>: 4000<br><b>Transaction</b>: 2000<br></td>',
            shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # start a thread to insert data into shard_1 in the background
        # with current time, and monitor the delay
        insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                       0x9000000000000000)
        insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                       0xD000000000000000)
        monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
        monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

        # tests a failover switching serving to a different replica
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
        shard_1_slave2.wait_for_vttablet_state('SERVING')
        shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

        # test data goes through again
        logging.debug("Inserting lots of data on source shard")
        self._insert_lots(1000, base=1000)
        logging.debug("Checking 80 percent of data was sent quickly")
        self._check_lots_timeout(1000, 80, 5, base=1000)

        # check we can't migrate the master just yet
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        expect_fail=True)

        # now serve rdonly from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # then serve replica from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-C0 C0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # move replica back and forth
        utils.run_vtctl(
            ['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
            auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-C0 C0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # reparent shard_2 to shard_2_replica1, then insert more data and
        # see it flow through still
        utils.run_vtctl([
            'ReparentShard', 'test_keyspace/80-C0',
            shard_2_replica1.tablet_alias
        ])
        logging.debug(
            "Inserting lots of data on source shard after reparenting")
        self._insert_lots(3000, base=2000)
        logging.debug("Checking 80 percent of data was sent fairly quickly")
        self._check_lots_timeout(3000, 80, 10, base=2000)

        # use the vtworker checker to compare the data again
        logging.debug("Running vtworker SplitDiff")
        utils.run_vtworker(
            ['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)
        utils.run_vtctl(
            ['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
            auto_log=True)

        # going to migrate the master now, check the delays
        monitor_thread_1.done = True
        monitor_thread_2.done = True
        insert_thread_1.done = True
        insert_thread_2.done = True
        logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                      monitor_thread_1.object_name, monitor_thread_1.max_lag,
                      monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
        logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                      monitor_thread_2.object_name, monitor_thread_2.max_lag,
                      monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

        # then serve master from the split shards
        utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                        auto_log=True)
        utils.check_srv_keyspace('test_nj',
                                 'test_keyspace',
                                 'Partitions(master): -80 80-C0 C0-\n' +
                                 'Partitions(rdonly): -80 80-C0 C0-\n' +
                                 'Partitions(replica): -80 80-C0 C0-\n' +
                                 'TabletTypes: master,rdonly,replica',
                                 keyspace_id_type=keyspace_id_type)

        # check the binlog players are gone now
        shard_2_master.wait_for_binlog_player_count(0)
        shard_3_master.wait_for_binlog_player_count(0)

        # get status for a destination master tablet, make sure it's good
        shard_2_master_status = shard_2_master.get_status()
        self.assertIn('No binlog player is running', shard_2_master_status)
        self.assertIn('</html>', shard_2_master_status)

        # scrap the original tablets in the original shard
        for t in [
                shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_rdonly
        ]:
            utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
        tablet.kill_tablets(
            [shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_rdonly])

        # rebuild the serving graph, all mentions of the old shards shoud be gone
        utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
                        auto_log=True)

        # test RemoveShardCell
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/-80', 'test_nj'],
                        auto_log=True,
                        expect_fail=True)
        utils.run_vtctl(['RemoveShardCell', 'test_keyspace/80-', 'test_nj'],
                        auto_log=True)
        shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
        if shard['Cells']:
            self.fail("Non-empty Cells record for shard: %s" % str(shard))

        # delete the original shard
        utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)

        # kill everything
        tablet.kill_tablets([
            shard_0_master, shard_0_replica, shard_2_master, shard_2_replica1,
            shard_2_replica2, shard_3_master, shard_3_replica, shard_3_rdonly
        ])
Example #49
0
  def test_resharding(self):
    utils.run_vtctl(['CreateKeyspace',
                     '--sharding_column_name', 'bad_column',
                     '--sharding_column_type', 'bytes',
                     'test_keyspace'])
    utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
                     'keyspace_id', 'uint64'], expect_fail=True)
    utils.run_vtctl(['SetKeyspaceShardingInfo', '-force', 'test_keyspace',
                     'keyspace_id', keyspace_id_type])

    shard_0_master.init_tablet( 'master',  'test_keyspace', '-80')
    shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
    shard_1_master.init_tablet( 'master',  'test_keyspace', '80-')
    shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
    shard_1_slave2.init_tablet('spare', 'test_keyspace', '80-')
    shard_1_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')

    utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

    # create databases so vttablet can start behaving normally
    for t in [shard_0_master, shard_0_replica, shard_1_master, shard_1_slave1,
              shard_1_slave2, shard_1_rdonly]:
      t.create_db('vt_test_keyspace')
      t.start_vttablet(wait_for_state=None)

    # wait for the tablets
    shard_0_master.wait_for_vttablet_state('SERVING')
    shard_0_replica.wait_for_vttablet_state('SERVING')
    shard_1_master.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('SERVING')
    shard_1_slave2.wait_for_vttablet_state('NOT_SERVING') # spare
    shard_1_rdonly.wait_for_vttablet_state('SERVING')

    # reparent to make the tablets work
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/-80',
                     shard_0_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-',
                     shard_1_master.tablet_alias], auto_log=True)

    # create the tables
    self._create_schema()
    self._insert_startup_values()

    # create the split shards
    shard_2_master.init_tablet( 'master',  'test_keyspace', '80-C0')
    shard_2_replica1.init_tablet('spare', 'test_keyspace', '80-C0')
    shard_2_replica2.init_tablet('spare', 'test_keyspace', '80-C0')
    shard_3_master.init_tablet( 'master',  'test_keyspace', 'C0-')
    shard_3_replica.init_tablet('spare', 'test_keyspace', 'C0-')
    shard_3_rdonly.init_tablet('rdonly', 'test_keyspace', 'C0-')

    # start vttablet on the split shards (no db created,
    # so they're all not serving)
    for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
              shard_3_master, shard_3_replica, shard_3_rdonly]:
      t.start_vttablet(wait_for_state=None)
    shard_2_master.wait_for_vttablet_state('CONNECTING')
    shard_2_replica1.wait_for_vttablet_state('NOT_SERVING')
    shard_2_replica2.wait_for_vttablet_state('NOT_SERVING')
    shard_3_master.wait_for_vttablet_state('CONNECTING')
    shard_3_replica.wait_for_vttablet_state('NOT_SERVING')
    shard_3_rdonly.wait_for_vttablet_state('CONNECTING')

    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/80-C0',
                     shard_2_master.tablet_alias], auto_log=True)
    utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/C0-',
                     shard_3_master.tablet_alias], auto_log=True)

    utils.run_vtctl(['RebuildKeyspaceGraph', '-use-served-types', 'test_keyspace'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica')

    # take the snapshot for the split
    utils.run_vtctl(['MultiSnapshot', '--spec=80-C0-',
                     shard_1_slave1.tablet_alias], auto_log=True)

    # wait for tablet's binlog server service to be enabled after snapshot,
    # and check all the others while we're at it
    shard_1_slave1.wait_for_binlog_server_state("Enabled")

    # perform the restore.
    utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
                     'test_keyspace/80-C0', shard_1_slave1.tablet_alias],
                    auto_log=True)
    utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
                     'test_keyspace/C0-', shard_1_slave1.tablet_alias],
                    auto_log=True)

    # check the startup values are in the right place
    self._check_startup_values()

    # check the schema too
    utils.run_vtctl(['ValidateSchemaKeyspace', 'test_keyspace'], auto_log=True)

    # check the binlog players are running
    shard_2_master.wait_for_binlog_player_count(1)
    shard_3_master.wait_for_binlog_player_count(1)

    # check that binlog server exported the stats vars
    self._check_binlog_server_vars(shard_1_slave1)

    # testing filtered replication: insert a bunch of data on shard 1,
    # check we get most of it after a few seconds, wait for binlog server
    # timeout, check we get all of it.
    logging.debug("Inserting lots of data on source shard")
    self._insert_lots(1000)
    logging.debug("Checking 80 percent of data is sent quickly")
    self._check_lots_timeout(1000, 80, 5)
    logging.debug("Checking all data goes through eventually")
    self._check_lots_timeout(1000, 100, 20)
    logging.debug("Checking no data was sent the wrong way")
    self._check_lots_not_present(1000)

    # use the vtworker checker to compare the data
    logging.debug("Running vtworker SplitDiff")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)

    utils.pause("Good time to test vtworker for diffs")

    # start a thread to insert data into shard_1 in the background
    # with current time, and monitor the delay
    insert_thread_1 = InsertThread(shard_1_master, "insert_low", 10000,
                                   0x9000000000000000)
    insert_thread_2 = InsertThread(shard_1_master, "insert_high", 10001,
                                   0xD000000000000000)
    monitor_thread_1 = MonitorLagThread(shard_2_replica2, "insert_low")
    monitor_thread_2 = MonitorLagThread(shard_3_replica, "insert_high")

    # tests a failover switching serving to a different replica
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
    utils.run_vtctl(['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
    shard_1_slave2.wait_for_vttablet_state('SERVING')
    shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')

    # test data goes through again
    logging.debug("Inserting lots of data on source shard")
    self._insert_lots(1000, base=1000)
    logging.debug("Checking 80 percent of data was sent quickly")
    self._check_lots_timeout(1000, 80, 5, base=1000)

    # check we can't migrate the master just yet
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    expect_fail=True)

    # now serve rdonly from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica')

    # then serve replica from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-C0 C0-\n' +
                             'TabletTypes: master,rdonly,replica')

    # move replica back and forth
    utils.run_vtctl(['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-\n' +
                             'TabletTypes: master,rdonly,replica')
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-C0 C0-\n' +
                             'TabletTypes: master,rdonly,replica')

    # reparent shard_2 to shard_2_replica1, then insert more data and
    # see it flow through still
    utils.run_vtctl(['ReparentShard', 'test_keyspace/80-C0',
                    shard_2_replica1.tablet_alias])
    logging.debug("Inserting lots of data on source shard after reparenting")
    self._insert_lots(3000, base=2000)
    logging.debug("Checking 80 percent of data was sent fairly quickly")
    self._check_lots_timeout(3000, 80, 10, base=2000)

    # use the vtworker checker to compare the data again
    logging.debug("Running vtworker SplitDiff")
    utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/C0-'],
                       auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)
    utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly.tablet_alias, 'rdonly'],
                    auto_log=True)

    # going to migrate the master now, check the delays
    monitor_thread_1.done = True
    monitor_thread_2.done = True
    insert_thread_1.done = True
    insert_thread_2.done = True
    logging.debug("DELAY 1: %s max_lag=%u avg_lag=%u",
                  monitor_thread_1.object_name,
                  monitor_thread_1.max_lag,
                  monitor_thread_1.lag_sum / monitor_thread_1.sample_count)
    logging.debug("DELAY 2: %s max_lag=%u avg_lag=%u",
                  monitor_thread_2.object_name,
                  monitor_thread_2.max_lag,
                  monitor_thread_2.lag_sum / monitor_thread_2.sample_count)

    # then serve master from the split shards
    utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
                    auto_log=True)
    utils.check_srv_keyspace('test_nj', 'test_keyspace',
                             'Partitions(master): -80 80-C0 C0-\n' +
                             'Partitions(rdonly): -80 80-C0 C0-\n' +
                             'Partitions(replica): -80 80-C0 C0-\n' +
                             'TabletTypes: master,rdonly,replica')

    # check the binlog players are gone now
    shard_2_master.wait_for_binlog_player_count(0)
    shard_3_master.wait_for_binlog_player_count(0)

    # kill everything
    tablet.kill_tablets([shard_0_master, shard_0_replica, shard_1_master,
                         shard_1_slave1, shard_1_slave2, shard_1_rdonly,
                         shard_2_master, shard_2_replica1, shard_2_replica2,
                         shard_3_master, shard_3_replica, shard_3_rdonly])
Example #50
0
    def verify(self):
        self.assert_shard_data_equal(0, worker.shard_master,
                                     worker.shard_0_tablets.replica)
        self.assert_shard_data_equal(1, worker.shard_master,
                                     worker.shard_1_tablets.replica)

        # Verify effect of MigrateServedTypes. Dest shards are serving now.
        utils.check_srv_keyspace(
            'test_nj', self.KEYSPACE, 'Partitions(master): -80 80-\n'
            'Partitions(rdonly): -80 80-\n'
            'Partitions(replica): -80 80-\n')

        # Check that query service is disabled (source shard) or enabled (dest).

        # The 'rdonly' tablet requires an explicit healthcheck first because
        # the following sequence of events is happening in this test:
        # - SplitDiff returns 'rdonly' as 'spare' tablet (NOT_SERVING)
        # - MigrateServedTypes runs and does not refresh then 'spare' tablet
        #   (still NOT_SERVING)
        #   Shard_TabletControl.DisableQueryService=true will be set in the topology
        # - explicit or periodic healthcheck runs:
        #   a) tablet seen as caught up, change type from 'spare' to 'rdonly'
        #      (change to SERVING)
        #   b) post-action callback agent.refreshTablet() reads the topology
        #      and finds out that DisableQueryService=true is set.
        #      (change to NOT_SERVING)
        #
        # We must run an explicit healthcheck or we can see one of the two states:
        # - NOT_SERVING, DisableQueryService=false, tablet type 'spare'
        #   (immediately after SplitDiff returned)
        # - SERVING, DisableQueryService=false, tablet type 'rdonly'
        #   (during healthcheck before post-action callback is called)
        utils.run_vtctl(['RunHealthCheck', worker.shard_rdonly1.tablet_alias],
                        auto_log=True)

        # source shard: query service must be disabled after MigrateServedTypes.
        utils.check_tablet_query_service(self,
                                         worker.shard_rdonly1,
                                         serving=False,
                                         tablet_control_disabled=True)
        utils.check_tablet_query_service(self,
                                         worker.shard_replica,
                                         serving=False,
                                         tablet_control_disabled=True)
        utils.check_tablet_query_service(self,
                                         worker.shard_master,
                                         serving=False,
                                         tablet_control_disabled=True)

        # dest shard -80: query service must be disabled after MigrateServedTypes.
        # Run explicit healthcheck because 'rdonly' tablet may still be 'spare'.
        utils.run_vtctl(
            ['RunHealthCheck', worker.shard_0_rdonly1.tablet_alias],
            auto_log=True)
        utils.check_tablet_query_service(self,
                                         worker.shard_0_rdonly1,
                                         serving=True,
                                         tablet_control_disabled=False)
        utils.check_tablet_query_service(self,
                                         worker.shard_0_replica,
                                         serving=True,
                                         tablet_control_disabled=False)
        utils.check_tablet_query_service(self,
                                         worker.shard_0_master,
                                         serving=True,
                                         tablet_control_disabled=False)

        # dest shard 80-: query service must be disabled after MigrateServedTypes.
        # Run explicit healthcheck because 'rdonly' tablet is still 'spare'.
        utils.run_vtctl(
            ['RunHealthCheck', worker.shard_1_rdonly1.tablet_alias],
            auto_log=True)
        utils.check_tablet_query_service(self,
                                         worker.shard_1_rdonly1,
                                         serving=True,
                                         tablet_control_disabled=False)
        utils.check_tablet_query_service(self,
                                         worker.shard_1_replica,
                                         serving=True,
                                         tablet_control_disabled=False)
        utils.check_tablet_query_service(self,
                                         worker.shard_1_master,
                                         serving=True,
                                         tablet_control_disabled=False)