Ejemplo n.º 1
0
    def implicit_reparent(self, keyspace, shard, num_shards):
        """Performs an implicit reparent.

    This function will call borg restart on the current master task and
    verify that a new task was selected to be the master.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      shard: Numeric ID of the shard to reparent (zero based int)
      num_shards: Total number of shards (int)
    """

        shard_name = sharding_utils.get_shard_name(shard, num_shards)

        original_master_name = self.env.get_current_master_name(keyspace, shard_name)
        original_master_cell = self.env.get_tablet_cell(original_master_name)
        master_task_num = self.env.get_tablet_task_number(original_master_name)

        logging.info(
            "Restarting %s/%s, current master: %s, task: %d",
            keyspace,
            shard_name,
            original_master_name,
            master_task_num,
        )
        ret_val = self.env.restart_mysql_task(
            original_master_cell, keyspace, shard, master_task_num, "replica", "mysql-alloc", True
        )

        self.assertEquals(ret_val, 0, msg="restartalloc failed (returned %d)" % ret_val)

        start_time = time.time()
        while time.time() - start_time < self.reparent_timeout_threshold:
            new_master_name = self.env.get_current_master_name(keyspace, shard_name)
            new_master_task_num = self.env.get_tablet_task_number(new_master_name)
            if new_master_name != original_master_name:
                break
            time.sleep(1)
        self.assertNotEquals(
            new_master_name,
            original_master_name,
            msg="Expected master tablet to change, but it remained as %s" % (new_master_name),
        )
        logging.info(
            "restartalloc on %s/%s resulted in new master: %s, task: %d",
            keyspace,
            shard_name,
            new_master_name,
            new_master_task_num,
        )
Ejemplo n.º 2
0
    def implicit_reparent(self, keyspace, shard, num_shards):
        """Performs an implicit reparent.

    This function will call borg restart on the current master task and
    verify that a new task was selected to be the master.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      shard: Numeric ID of the shard to reparent (zero based int)
      num_shards: Total number of shards (int)
    """

        shard_name = sharding_utils.get_shard_name(shard, num_shards)

        original_master_name = (self.env.get_current_master_name(
            keyspace, shard_name))
        original_master_cell = self.env.get_tablet_cell(original_master_name)
        master_task_num = self.env.get_tablet_task_number(original_master_name)

        logging.info('Restarting %s/%s, current master: %s, task: %d',
                     keyspace, shard_name, original_master_name,
                     master_task_num)
        ret_val = self.env.restart_mysql_task(original_master_cell, keyspace,
                                              shard, master_task_num,
                                              'replica', 'mysql-alloc', True)

        self.assertEquals(ret_val,
                          0,
                          msg='restartalloc failed (returned %d)' % ret_val)

        start_time = time.time()
        while time.time() - start_time < self.reparent_timeout_threshold:
            new_master_name = self.env.get_current_master_name(
                keyspace, shard_name)
            new_master_task_num = self.env.get_tablet_task_number(
                new_master_name)
            if new_master_name != original_master_name:
                break
            time.sleep(1)
        self.assertNotEquals(
            new_master_name,
            original_master_name,
            msg='Expected master tablet to change, but it remained as %s' %
            (new_master_name))
        logging.info(
            'restartalloc on %s/%s resulted in new master: %s, task: %d',
            keyspace, shard_name, new_master_name, new_master_task_num)
Ejemplo n.º 3
0
    def explicit_reparent(self,
                          keyspace,
                          num_shards,
                          external=False,
                          cross_cell=False):
        """Performs an explicit reparent.

    This function will explicitly select a new master and verify that the
    topology is updated.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      num_shards: Total number of shards (int)
      external: Whether the reparent should be external or through vtctl (bool)
      cross_cell: Whether to reparent to a different cell (bool)

    Returns:
      How long we waited for the reparent.
      The time begins just before calling an explicit reparent.
      This is a list of floats, one for each shard.
      For cross-cell reparents, it returns [].
    """
        next_masters = []
        durations = []

        for shard in xrange(num_shards):
            shard_name = sharding_utils.get_shard_name(shard, num_shards)
            original_master = self.env.get_current_master_name(
                keyspace, shard_name)

            next_master = self.env.get_next_master(keyspace, shard_name,
                                                   cross_cell)
            next_masters.append(next_master)

            self.env.wait_for_good_failover_status(keyspace, shard_name)

            # Call Reparent in a separate thread.
            def reparent_shard(shard, shard_name, original_master,
                               next_master):
                logging.info('Reparenting %s/%s from %s to %s', keyspace,
                             shard_name, original_master, next_master[2])
                if external:
                    return_code, return_output = self.env.external_reparent(
                        keyspace,
                        next_master[0],
                        shard,
                        new_task_num=next_master[1])
                else:
                    return_code, return_output = self.env.internal_reparent(
                        keyspace, shard_name, next_master[2])
                logging.info('Reparent returned %d for %s/%s: %s', return_code,
                             keyspace, shard_name, return_output)

            thread = threading.Thread(
                target=reparent_shard,
                args=[shard, shard_name, original_master, next_master])
            start_time = time.time()
            thread.start()

            # Wait for the reparent.
            while time.time() - start_time < self.reparent_timeout_threshold:
                try:
                    tablet_health = json.loads(
                        self.env.vtctl_helper.execute_vtctl_command(
                            ['VtTabletStreamHealth', next_master[2]]))
                    if tablet_health['target'][
                            'tablet_type'] == topodata_pb2.MASTER:
                        duration = time.time() - start_time
                        durations.append(duration)
                        logging.info('Reparent took %f seconds', duration)
                        break
                except (IndexError, KeyError, vtctl_helper.VtctlClientError):
                    pass
            else:
                self.fail('Timed out waiting for reparent on %s/%s' %
                          (keyspace, shard_name))

            thread.join()

        return durations
Ejemplo n.º 4
0
    def explicit_reparent(self, keyspace, num_shards, external=False, cross_cell=False):
        """Performs an explicit reparent.

    This function will explicitly select a new master and verify that the
    topology is updated.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      num_shards: Total number of shards (int)
      external: Whether the reparent should be external or through vtctl (bool)
      cross_cell: Whether to reparent to a different cell (bool)

    Returns:
      How long we waited for the reparent.
      The time begins just before calling an explicit reparent.
      This is a list of floats, one for each shard.
      For cross-cell reparents, it returns [].
    """
        next_masters = []
        durations = []

        for shard in xrange(num_shards):
            shard_name = sharding_utils.get_shard_name(shard, num_shards)
            original_master = self.env.get_current_master_name(keyspace, shard_name)

            next_master = self.env.get_next_master(keyspace, shard_name, cross_cell)
            next_masters.append(next_master)

            self.env.wait_for_good_failover_status(keyspace, shard_name)

            # Call Reparent in a separate thread.
            def reparent_shard(shard, shard_name, original_master, next_master):
                logging.info("Reparenting %s/%s from %s to %s", keyspace, shard_name, original_master, next_master[2])
                if external:
                    return_code, return_output = self.env.external_reparent(
                        keyspace, next_master[0], shard, new_task_num=next_master[1]
                    )
                else:
                    return_code, return_output = self.env.internal_reparent(keyspace, shard_name, next_master[2])
                logging.info("Reparent returned %d for %s/%s: %s", return_code, keyspace, shard_name, return_output)

            thread = threading.Thread(target=reparent_shard, args=[shard, shard_name, original_master, next_master])
            start_time = time.time()
            thread.start()

            # Wait for the reparent.
            while time.time() - start_time < self.reparent_timeout_threshold:
                try:
                    tablet_health = json.loads(
                        self.env.vtctl_helper.execute_vtctl_command(["VtTabletStreamHealth", next_master[2]])
                    )
                    if tablet_health["target"]["tablet_type"] == topodata_pb2.MASTER:
                        duration = time.time() - start_time
                        durations.append(duration)
                        logging.info("Reparent took %f seconds", duration)
                        break
                except (IndexError, KeyError, vtctl_helper.VtctlClientError):
                    pass
            else:
                self.fail("Timed out waiting for reparent on %s/%s" % (keyspace, shard_name))

            thread.join()

        return durations