def internal_reparent(self, keyspace, new_cell, shard, num_shards,
                       new_task_num, emergency=False):
   shard_name = utils.get_shard_name(shard, num_shards)
   cell_number = self.cells.index(new_cell) + 1
   new_master = '%s-%02d00000%d%02d' % (
       new_cell, cell_number, shard + 1, new_task_num)
   reparent_command = (
       'EmergencyReparentShard' if emergency else 'PlannedReparentShard')
   self.vtctl_helper.execute_vtctl_command(
       [reparent_command, '%s/%s' % (keyspace, shard_name), new_master])
   self.vtctl_helper.execute_vtctl_command(['RebuildKeyspaceGraph', keyspace])
   return 0, 'No output'
Beispiel #2
0
    def implicit_reparent(self, keyspace, shard, num_shards):
        """Performs an implicit reparent.

    This function will call borg restart on the current master task and
    verify that decider selected a new task to be the master.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      shard: Numeric ID of the shard to reparent (zero based int)
      num_shards: Total number of shards (int)
    """

        shard_name = utils.get_shard_name(shard, num_shards)

        original_master_name = (self.env.get_current_master_name(
            keyspace, shard_name))
        original_master_cell = self.env.get_tablet_cell(original_master_name)
        master_task_num = self.env.get_tablet_task_number(original_master_name)

        logging.info('Restarting %s/%s, current master: %s, task: %d',
                     keyspace, shard_name, original_master_name,
                     master_task_num)
        ret_val = self.env.restart_mysql_task(original_master_cell, keyspace,
                                              shard, master_task_num,
                                              'replica', 'mysql-alloc', True)

        self.assertEquals(ret_val,
                          0,
                          msg='restartalloc failed (returned %d)' % ret_val)

        start_time = time.time()
        while time.time() - start_time < self.reparent_timeout_threshold:
            new_master_name = self.env.get_current_master_name(
                keyspace, shard_name)
            new_master_task_num = self.env.get_tablet_task_number(
                new_master_name)
            if new_master_name != original_master_name:
                break
            time.sleep(1)
        self.assertNotEquals(
            new_master_name,
            original_master_name,
            msg='Expected master tablet to change, but it remained as %s' %
            (new_master_name))
        logging.info(
            'restartalloc on %s/%s resulted in new master: %s, task: %d',
            keyspace, shard_name, new_master_name, new_master_task_num)
Beispiel #3
0
 def internal_reparent(self,
                       keyspace,
                       new_cell,
                       shard,
                       num_shards,
                       new_task_num,
                       emergency=False):
     shard_name = utils.get_shard_name(shard, num_shards)
     cell_number = self.cells.index(new_cell) + 1
     new_master = '%s-%02d00000%d%02d' % (new_cell, cell_number, shard + 1,
                                          new_task_num)
     reparent_command = ('EmergencyReparentShard'
                         if emergency else 'PlannedReparentShard')
     self.vtctl_helper.execute_vtctl_command(
         [reparent_command,
          '%s/%s' % (keyspace, shard_name), new_master])
     self.vtctl_helper.execute_vtctl_command(
         ['RebuildKeyspaceGraph', keyspace])
     return 0, 'No output'
Beispiel #4
0
  def implicit_reparent(self, keyspace, shard, num_shards):
    """Performs an implicit reparent.

    This function will call borg restart on the current master task and
    verify that decider selected a new task to be the master.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      shard: Numeric ID of the shard to reparent (zero based int)
      num_shards: Total number of shards (int)
    """

    shard_name = utils.get_shard_name(shard, num_shards)

    original_master_name = (
        self.env.get_current_master_name(keyspace, shard_name))
    original_master_cell = self.env.get_tablet_cell(original_master_name)
    master_task_num = self.env.get_tablet_task_number(original_master_name)

    logging.info('Restarting %s/%s, current master: %s, task: %d',
                 keyspace, shard_name, original_master_name, master_task_num)
    ret_val = self.env.restart_mysql_task(
        original_master_cell, keyspace, shard, master_task_num, 'replica',
        'mysql-alloc', True)

    self.assertEquals(ret_val, 0,
                      msg='restartalloc failed (returned %d)' % ret_val)

    start_time = time.time()
    while time.time() - start_time < self.reparent_timeout_threshold:
      new_master_name = self.env.get_current_master_name(keyspace, shard_name)
      new_master_task_num = self.env.get_tablet_task_number(new_master_name)
      if new_master_name != original_master_name:
        break
      time.sleep(1)
    self.assertNotEquals(
        new_master_name, original_master_name,
        msg='Expected master tablet to change, but it remained as %s' % (
            new_master_name))
    logging.info('restartalloc on %s/%s resulted in new master: %s, task: %d',
                 keyspace, shard_name, new_master_name, new_master_task_num)
Beispiel #5
0
  def explicit_reparent(self, keyspace, num_shards, external=False,
                        cross_cell=False):
    """Performs an explicit reparent.

    This function will call decider to explicity select a new master and verify
    that the topology is updated.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      num_shards: Total number of shards (int)
      external: Whether the reparent should be external or through vtctl (bool)
      cross_cell: Whether to reparent to a different cell (bool)

    Returns:
      How long we waited for the serving graph to be updated.
      The time begins just before calling Decider.
      This is a list of floats, one for each shard.
      For cross-cell reparents, it returns [].
    """
    original_masters = []
    next_masters = []
    shard_names = []
    durations = []

    for shard in xrange(num_shards):
      shard_name = utils.get_shard_name(shard, num_shards)
      shard_names.append(shard_name)

      original_master_name = self.env.get_current_master_name(
          keyspace, shard_name)
      original_master = {
          'cell': self.env.get_tablet_cell(original_master_name),
          'task': self.env.get_tablet_task_number(original_master_name),
          }
      original_masters.append(original_master)

      next_master_cell, next_master_task = self.env.get_next_master(
          keyspace, shard_name, cross_cell)
      next_master = {
          'cell': next_master_cell,
          'task': next_master_task,
      }
      next_masters.append(next_master)

      self.env.wait_for_good_failover_status(keyspace, shard_name)

      # Call Reparent in a separate thread.
      def reparent_shard(shard, shard_name, original_master, next_master):
        logging.info('Reparenting %s/%s from %s to %s', keyspace, shard_name,
                     original_master, next_master)
        reparent_fn = (
            self.env.external_reparent if external else
            self.env.internal_reparent)

        return_code, return_output = reparent_fn(
            keyspace, next_master['cell'], shard, num_shards,
            new_task_num=next_master['task'])
        logging.info('Reparent returned %d for %s/%s: %s',
                     return_code, keyspace, shard_name, return_output)

      thread = threading.Thread(target=reparent_shard,
                                args=[shard, shard_name, original_master,
                                      next_master])
      start_time = time.time()
      thread.start()

      if not cross_cell:
        # Wait for the shard to be updated.
        # This doesn't work for cross-cell, because mapping a task
        # number to a tablet UID is more trouble than it's worth.
        uid = (self.env.get_tablet_uid(original_master_name)
               - original_master['task'] + next_master['task'])
        while True:
          if time.time() - start_time > self.reparent_timeout_threshold:
            self.fail('Timed out waiting for serving graph update on %s/%s' % (
                keyspace, shard_name))
          try:
            shard_info = json.loads(self.env.vtctl_helper.execute_vtctl_command(
                ['GetShard', '%s/%s' % (keyspace, shard_name)]))
            if int(shard_info['master_alias']['uid']) == uid:
              duration = time.time() - start_time
              durations.append(duration)
              logging.info('Shard record updated for %s/%s after %f seconds',
                           keyspace, shard_name, duration)
              break
          except (IndexError, KeyError, vtctl_helper.VtctlClientError):
            pass

      thread.join()

    for shard_name, next_master in zip(shard_names, next_masters):
      start_time = time.time()
      while True:
        if time.time() - start_time > self.reparent_timeout_threshold:
          self.fail('%s/%s master was not updated to %s within %d seconds' % (
              keyspace, shard_name, next_master,
              self.reparent_timeout_threshold))
        if self.verify_new_master(
            keyspace, shard_name, next_master['cell'], next_master['task']):
          logging.info('%s/%s\'s new master is %s', keyspace, shard_name,
                       next_master)
          break
        time.sleep(1)

    return durations
Beispiel #6
0
    def use_named(self, instance_name):
        # Check to make sure kubectl exists
        try:
            subprocess.check_output(['kubectl'])
        except OSError:
            raise base_environment.VitessEnvironmentError(
                'kubectl not found, please install by visiting kubernetes.io or '
                'running gcloud components update kubectl if using compute engine.'
            )

        get_address_template = (
            '{{if ge (len .status.loadBalancer) 1}}'
            '{{index (index .status.loadBalancer.ingress 0) "ip"}}'
            '{{end}}')

        get_address_params = [
            'kubectl', 'get', '-o', 'template', '--template',
            get_address_template, 'service', '--namespace', instance_name
        ]

        start_time = time.time()
        vtctld_addr = ''
        while time.time() - start_time < 60 and not vtctld_addr:
            vtctld_addr = subprocess.check_output(get_address_params +
                                                  ['vtctld'],
                                                  stderr=subprocess.STDOUT)
        self.vtctl_addr = '%s:15999' % vtctld_addr

        self.vtctl_helper = vtctl_helper.VtctlHelper('grpc', self.vtctl_addr)
        self.cluster_name = instance_name

        keyspaces = self.vtctl_helper.execute_vtctl_command(['GetKeyspaces'])
        self.mobs = filter(None, keyspaces.split('\n'))
        self.keyspaces = self.mobs

        if not self.keyspaces:
            raise base_environment.VitessEnvironmentError(
                'Invalid environment, no keyspaces found')

        self.num_shards = []

        for keyspace in self.keyspaces:
            keyspace_info = json.loads(
                self.vtctl_helper.execute_vtctl_command(
                    ['GetKeyspace', keyspace]))
            if not keyspace_info:
                self.num_shards.append(1)
            else:
                self.num_shards.append(keyspace_info['split_shard_count'])

        # This assumes that all keyspaces use the same set of cells
        self.cells = json.loads(
            self.vtctl_helper.execute_vtctl_command([
                'GetShard',
                '%s/%s' % (self.keyspaces[0],
                           utils.get_shard_name(0, self.num_shards[0]))
            ]))['cells']

        self.primary_cells = self.cells
        self.replica_instances = []
        self.rdonly_instances = []

        # This assumes that all cells are equivalent for k8s environments.
        all_tablets_in_a_cell = self.vtctl_helper.execute_vtctl_command(
            ['ListAllTablets', self.cells[0]])
        all_tablets_in_a_cell = [
            x.split(' ')
            for x in filter(None, all_tablets_in_a_cell.split('\n'))
        ]

        for index, keyspace in enumerate(self.keyspaces):
            keyspace_tablets_in_cell = [
                tablet for tablet in all_tablets_in_a_cell
                if tablet[1] == keyspace
            ]
            replica_tablets_in_cell = [
                tablet for tablet in keyspace_tablets_in_cell
                if tablet[3] == 'master' or tablet[3] == 'replica'
            ]
            replica_instances = len(
                replica_tablets_in_cell) / self.num_shards[index]
            self.replica_instances.append(replica_instances)
            self.rdonly_instances.append((len(keyspace_tablets_in_cell) /
                                          self.num_shards[index]) -
                                         replica_instances)

        # Converts keyspace name and alias to number of instances
        self.keyspace_alias_to_num_instances_dict = {}
        for index, keyspace in enumerate(self.keyspaces):
            self.keyspace_alias_to_num_instances_dict[keyspace] = {
                'replica': int(self.replica_instances[index]),
                'rdonly': int(self.rdonly_instances[index])
            }

        start_time = time.time()
        self.vtgate_addrs = {}
        self.vtgate_conns = {}
        for cell in self.cells:
            self.vtgate_addr = ''
            while time.time() - start_time < 60 and not self.vtgate_addr:
                vtgate_addr = subprocess.check_output(get_address_params +
                                                      ['vtgate-%s' % cell],
                                                      stderr=subprocess.STDOUT)
            self.vtgate_addrs[cell] = '%s:15001' % vtgate_addr
            self.vtgate_conns[cell] = vtgate_client.connect(
                protocols_flavor.protocols_flavor().vtgate_python_protocol(),
                self.vtgate_addrs[cell], 60)
Beispiel #7
0
    def explicit_reparent(self,
                          keyspace,
                          num_shards,
                          external=False,
                          cross_cell=False):
        """Performs an explicit reparent.

    This function will call decider to explicity select a new master and verify
    that the topology is updated.

    Args:
      keyspace: Name of the keyspace to reparent (string)
      num_shards: Total number of shards (int)
      external: Whether the reparent should be external or through vtctl (bool)
      cross_cell: Whether to reparent to a different cell (bool)

    Returns:
      How long we waited for the serving graph to be updated.
      The time begins just before calling Decider.
      This is a list of floats, one for each shard.
      For cross-cell reparents, it returns [].
    """
        original_masters = []
        next_masters = []
        shard_names = []
        durations = []

        for shard in xrange(num_shards):
            shard_name = utils.get_shard_name(shard, num_shards)
            shard_names.append(shard_name)

            original_master_name = self.env.get_current_master_name(
                keyspace, shard_name)
            original_master = {
                'cell': self.env.get_tablet_cell(original_master_name),
                'task': self.env.get_tablet_task_number(original_master_name),
            }
            original_masters.append(original_master)

            next_master_cell, next_master_task = self.env.get_next_master(
                keyspace, shard_name, cross_cell)
            next_master = {
                'cell': next_master_cell,
                'task': next_master_task,
            }
            next_masters.append(next_master)

            self.env.wait_for_good_failover_status(keyspace, shard_name)

            # Call Reparent in a separate thread.
            def reparent_shard(shard, shard_name, original_master,
                               next_master):
                logging.info('Reparenting %s/%s from %s to %s', keyspace,
                             shard_name, original_master, next_master)
                reparent_fn = (self.env.external_reparent
                               if external else self.env.internal_reparent)

                return_code, return_output = reparent_fn(
                    keyspace,
                    next_master['cell'],
                    shard,
                    num_shards,
                    new_task_num=next_master['task'])
                logging.info('Reparent returned %d for %s/%s: %s', return_code,
                             keyspace, shard_name, return_output)

            thread = threading.Thread(
                target=reparent_shard,
                args=[shard, shard_name, original_master, next_master])
            start_time = time.time()
            thread.start()

            if not cross_cell:
                # Wait for the serving graph to be updated.
                # This doesn't work for cross-cell, because mapping a task
                # number to a tablet UID is more trouble than it's worth.
                uid = (self.env.get_tablet_uid(original_master_name) -
                       original_master['task'] + next_master['task'])
                while True:
                    if time.time(
                    ) - start_time > self.reparent_timeout_threshold:
                        self.fail(
                            'Timed out waiting for serving graph update on %s/%s'
                            % (keyspace, shard_name))
                    try:
                        endpoints = json.loads(
                            self.env.vtctl_helper.execute_vtctl_command([
                                'GetEndPoints', next_master['cell'],
                                '%s/%s' % (keyspace, shard_name), 'master'
                            ]))
                        if int(endpoints['entries'][0]['uid']) == uid:
                            duration = time.time() - start_time
                            durations.append(duration)
                            logging.info(
                                'Serving graph updated for %s/%s after %f seconds',
                                keyspace, shard_name, duration)
                            break
                    except (IndexError, KeyError,
                            vtctl_helper.VtctlClientError):
                        pass

            thread.join()

        for shard_name, next_master in zip(shard_names, next_masters):
            start_time = time.time()
            while True:
                if time.time() - start_time > self.reparent_timeout_threshold:
                    self.fail(
                        '%s/%s master was not updated to %s within %d seconds'
                        % (keyspace, shard_name, next_master,
                           self.reparent_timeout_threshold))
                if self.verify_new_master(keyspace, shard_name,
                                          next_master['cell'],
                                          next_master['task']):
                    logging.info('%s/%s\'s new master is %s', keyspace,
                                 shard_name, next_master)
                    break
                time.sleep(1)

        return durations
Beispiel #8
0
  def use_named(self, instance_name):
    # Check to make sure kubectl exists
    try:
      subprocess.check_output(['kubectl'])
    except OSError:
      raise base_environment.VitessEnvironmentError(
          'kubectl not found, please install by visiting kubernetes.io or '
          'running gcloud components update kubectl if using compute engine.')

    get_address_template = (
        '{{if ge (len .status.loadBalancer) 1}}'
        '{{index (index .status.loadBalancer.ingress 0) "ip"}}'
        '{{end}}')

    get_address_params = ['kubectl', 'get', '-o', 'template', '--template',
                          get_address_template, 'service', '--namespace',
                          instance_name]

    start_time = time.time()
    vtctld_addr = ''
    while time.time() - start_time < 60 and not vtctld_addr:
      vtctld_addr = subprocess.check_output(
          get_address_params + ['vtctld'], stderr=subprocess.STDOUT)
    self.vtctl_addr = '%s:15999' % vtctld_addr

    self.vtctl_helper = vtctl_helper.VtctlHelper('grpc', self.vtctl_addr)
    self.cluster_name = instance_name

    keyspaces = self.vtctl_helper.execute_vtctl_command(['GetKeyspaces'])
    self.mobs = filter(None, keyspaces.split('\n'))
    self.keyspaces = self.mobs

    if not self.keyspaces:
      raise base_environment.VitessEnvironmentError(
          'Invalid environment, no keyspaces found')

    self.num_shards = []

    for keyspace in self.keyspaces:
      keyspace_info = json.loads(self.vtctl_helper.execute_vtctl_command(
          ['GetKeyspace', keyspace]))
      if not keyspace_info:
        self.num_shards.append(1)
      else:
        self.num_shards.append(keyspace_info['split_shard_count'])

    # This assumes that all keyspaces use the same set of cells
    self.cells = json.loads(self.vtctl_helper.execute_vtctl_command(
        ['GetShard', '%s/%s' % (
            self.keyspaces[0], utils.get_shard_name(0, self.num_shards[0]))]
        ))['cells']

    self.primary_cells = self.cells
    self.replica_instances = []
    self.rdonly_instances = []

    # This assumes that all cells are equivalent for k8s environments.
    all_tablets_in_a_cell = self.vtctl_helper.execute_vtctl_command(
        ['ListAllTablets', self.cells[0]])
    all_tablets_in_a_cell = [x.split(' ') for x in
                             filter(None, all_tablets_in_a_cell.split('\n'))]

    for index, keyspace in enumerate(self.keyspaces):
      keyspace_tablets_in_cell = [
          tablet for tablet in all_tablets_in_a_cell if tablet[1] == keyspace]
      replica_tablets_in_cell = [
          tablet for tablet in keyspace_tablets_in_cell
          if tablet[3] == 'master' or tablet[3] == 'replica']
      replica_instances = len(replica_tablets_in_cell) / self.num_shards[index]
      self.replica_instances.append(replica_instances)
      self.rdonly_instances.append(
          (len(keyspace_tablets_in_cell) / self.num_shards[index]) -
          replica_instances)

    # Converts keyspace name and alias to number of instances
    self.keyspace_alias_to_num_instances_dict = {}
    for index, keyspace in enumerate(self.keyspaces):
      self.keyspace_alias_to_num_instances_dict[keyspace] = {
          'replica': int(self.replica_instances[index]),
          'rdonly': int(self.rdonly_instances[index])
      }

    start_time = time.time()
    self.vtgate_addrs = {}
    self.vtgate_conns = {}
    for cell in self.cells:
      self.vtgate_addr = ''
      while time.time() - start_time < 60 and not self.vtgate_addr:
        vtgate_addr = subprocess.check_output(
            get_address_params + ['vtgate-%s' % cell], stderr=subprocess.STDOUT)
      self.vtgate_addrs[cell] = '%s:15001' % vtgate_addr
      self.vtgate_conns[cell] = vtgate_client.connect(
          protocols_flavor.protocols_flavor().vtgate_python_protocol(),
          self.vtgate_addrs[cell], 60)