Example #1
0
def write_snapshot(node):
    """
    Calls out to innobackupex to snapshot the DB, then pushes the file
    to Manta and writes that the work is completed in Consul.
    """
    now = datetime.utcnow()
    # we don't want .isoformat() here because of URL encoding
    backup_id = now.strftime('{}'.format(BACKUP_NAME))
    backup_time = now.isoformat()

    with open('/tmp/backup.tar', 'w') as f:
        subprocess.check_call(['/usr/bin/innobackupex',
                               '--user={}'.format(node.mysql.repl_user),
                               '--password={}'.format(node.mysql.repl_password),
                               '--no-timestamp',
                               #'--compress',
                               '--stream=tar',
                               '/tmp/backup'], stdout=f)
    log.info('snapshot completed, uploading to object store')
    node.manta.put_backup(backup_id, '/tmp/backup.tar')
    log.info('snapshot uploaded to %s/%s', node.manta.bucket, backup_id)

    # write the filename of the binlog to Consul so that we know if
    # we've rotated since the last backup.
    # query lets KeyError bubble up -- something's broken
    results = node.mysql.query('show master status')
    binlog_file = results[0]['File']
    node.consul.record_backup(backup_id, backup_time, binlog_file)
Example #2
0
def write_snapshot(node):
    """
    Calls out to innobackupex to snapshot the DB, then pushes the file
    to Manta and writes that the work is completed in Consul.
    """
    now = datetime.utcnow()
    # we don't want .isoformat() here because of URL encoding
    backup_id = now.strftime('{}'.format(BACKUP_NAME))
    backup_time = now.isoformat()

    with open('/tmp/backup.tar', 'w') as f:
        subprocess.check_call(
            [
                '/usr/bin/innobackupex',
                '--user={}'.format(node.mysql.repl_user),
                '--password={}'.format(node.mysql.repl_password),
                '--no-timestamp',
                #'--compress',
                '--stream=tar',
                '/tmp/backup'
            ],
            stdout=f)
    log.info('snapshot completed, uploading to object store')
    saved_path = node.storage.put_backup(backup_id, '/tmp/backup.tar')
    log.info('snapshot uploaded to %s', saved_path)

    # write the filename of the binlog to Consul so that we know if
    # we've rotated since the last backup.
    # query lets KeyError bubble up -- something's broken
    results = node.mysql.query('show master status')
    binlog_file = results[0]['File']
    node.consul.record_backup(backup_id, backup_time, binlog_file)
Example #3
0
 def reload(self):
     """ Force ContainerPilot to reload its configuration """
     log.info('Reloading ContainerPilot configuration.')
     try:
         subprocess.check_output(['containerpilot', '-reload'])
     except subprocess.CalledProcessError:
         log.info("call to 'containerpilot -reload' failed")
Example #4
0
def run_as_replica(node):
    """
    Set up GTID-based replication to the primary; once this is set the
    replica will automatically try to catch up with the primary's last
    transactions. UnknownPrimary or mysqlconn.Errors are allowed to
    bubble up to the caller.
    """
    log.info('Setting up replication.')
    node.cp.state = REPLICA
    _, primary_ip = node.consul.get_primary(timeout=30)
    node.mysql.setup_replication(primary_ip)
Example #5
0
def run_as_replica(node):
    """
    Set up GTID-based replication to the primary; once this is set the
    replica will automatically try to catch up with the primary's last
    transactions. UnknownPrimary or mysqlconn.Errors are allowed to
    bubble up to the caller.
    """
    log.info('Setting up replication.')
    node.cp.state = REPLICA
    _, primary_ip = node.consul.get_primary(timeout=30)
    node.mysql.setup_replication(primary_ip)
Example #6
0
def on_change(node):
    """ The top-level ContainerPilot onChange handler """

    # first check if this node has already been set primary by a completed
    # call to failover and update the ContainerPilot config as needed.
    if node.is_primary():
        log.debug('[on_change] this node is primary, no failover required.')
        if node.cp.update():
            # we're ignoring the lock here intentionally
            node.consul.put(PRIMARY_KEY, node.name)
            node.cp.reload()
        return

    # check if another node has been set primary already and is reporting
    # as healthy, in which case there's no failover required. Note that
    # we can't simply check if we're a replica via .is_replica() b/c that
    # trusts mysqld's view of the world.
    try:
        node.consul.get_primary(timeout=1)
        log.debug('[on_change] primary is already healthy, no failover required')
        return
    except (UnknownPrimary, WaitTimeoutError) as ex:
        log.debug('[on_change] no primary from consul: %s', ex)

    if node.consul.lock_failover(node.name):
        try:
            nodes = node.consul.client.health.service(REPLICA, passing=True)[1]
            ips = [instance['Service']['Address'] for instance in nodes]
            log.info('[on_change] Executing failover with candidates: %s', ips)
            node.mysql.failover(ips)
        except Exception:
            # On failure we bubble-up the exception and fail the onChange.
            # Either another instance that didn't overlap in time will
            # complete failover or we'll be left w/o a primary and require
            # manual intervention via `mysqlrpladmin failover`
            node.consul.unlock_failover()
            raise
    else:
        log.info('[on_change] Failover in progress on another node, '
                 'waiting to complete.')
        node.consul.wait_for_failover_lock()

    # need to determine replicaton status at this point, so make
    # sure we refresh .state from mysqld/Consul
    node.cp.state = UNASSIGNED
    if node.is_primary():
        log.info('[on_change] node %s is primary after failover', node.name)
        if node.cp.update():
            # we're intentionally ignoring the advisory lock here
            ok = node.consul.put(PRIMARY_KEY, node.name)
            log.debug('[on_change] %s obtained lock: %s', node.name, ok)
            node.cp.reload()
        return
    elif node.is_replica():
        log.info('[on_change] node %s is replica after failover', node.name)

    if node.cp.state == UNASSIGNED:
        log.error('[on_change] this node is neither primary or replica '
                  'after failover; check replication status on cluster.')
        sys.exit(1)
Example #7
0
def on_change(node):
    """ The top-level ContainerPilot onChange handler """

    # first check if this node has already been set primary by a completed
    # call to failover and update the ContainerPilot config as needed.
    if node.is_primary():
        log.debug('[on_change] this node is primary, no failover required.')
        if node.cp.update():
            # we're ignoring the lock here intentionally
            node.consul.put(PRIMARY_KEY, node.name)
            node.cp.reload()
        return

    # check if another node has been set primary already and is reporting
    # as healthy, in which case there's no failover required. Note that
    # we can't simply check if we're a replica via .is_replica() b/c that
    # trusts mysqld's view of the world.
    try:
        node.consul.get_primary(timeout=1)
        log.debug('[on_change] primary is already healthy, no failover required')
        return
    except (UnknownPrimary, WaitTimeoutError) as ex:
        log.debug('[on_change] no primary from consul: %s', ex)

    if node.consul.lock_failover(node.name):
        try:
            nodes = node.consul.client.health.service(REPLICA, passing=True)[1]
            ips = [instance['Service']['Address'] for instance in nodes]
            log.info('[on_change] Executing failover with candidates: %s', ips)
            node.mysql.failover(ips)
        except Exception:
            # On failure we bubble-up the exception and fail the onChange.
            # Either another instance that didn't overlap in time will
            # complete failover or we'll be left w/o a primary and require
            # manual intervention via `mysqlrpladmin failover`
            node.consul.unlock_failover()
            raise
    else:
        log.info('[on_change] Failover in progress on another node, '
                 'waiting to complete.')
        node.consul.wait_for_failover_lock()

    # need to determine replicaton status at this point, so make
    # sure we refresh .state from mysqld/Consul
    node.cp.state = UNASSIGNED
    if node.is_primary():
        log.info('[on_change] node %s is primary after failover', node.name)
        if node.cp.update():
            # we're intentionally ignoring the advisory lock here
            ok = node.consul.put(PRIMARY_KEY, node.name)
            log.debug('[on_change] %s obtained lock: %s', node.name, ok)
            node.cp.reload()
        return
    elif node.is_replica():
        log.info('[on_change] node %s is replica after failover', node.name)

    if node.cp.state == UNASSIGNED:
        log.error('[on_change] this node is neither primary or replica '
                  'after failover; check replication status on cluster.')
        sys.exit(1)
Example #8
0
def pre_start(node):
    """
    the top-level ContainerPilot `preStart` handler.
    MySQL must be running in order to execute most of our setup behavior
    so we're just going to make sure the directory structures are in
    place and then let the first health check handler take it from there
    """
    # make sure that if we've pulled in an external data volume that
    # the mysql user can read it
    my = node.mysql
    my.take_ownership()
    my.render()
    if not os.path.isdir(os.path.join(my.datadir, 'mysql')):
        if not my.initialize_db():
            log.info('Skipping database setup.')
Example #9
0
 def initialize_db(self):
     """
     post-installation run to set up data directories
     and install mysql.user tables
     """
     self.make_datadir()
     log.info('Initializing database...')
     try:
         subprocess.check_call(['/usr/bin/mysql_install_db',
                                '--user=mysql',
                                '--datadir={}'.format(self.datadir)])
         log.info('Database initialized.')
         return True
     except subprocess.CalledProcessError:
         log.warn('Database was previously initialized.')
         return False
 def initialize_db(self):
     """
     post-installation run to set up data directories
     and install mysql.user tables
     """
     self.make_datadir()
     log.info('Initializing database...')
     try:
         subprocess.check_call([
             '/usr/bin/mysql_install_db', '--user=mysql',
             '--datadir={}'.format(self.datadir)
         ])
         log.info('Database initialized.')
         return True
     except subprocess.CalledProcessError:
         log.warn('Database was previously initialized.')
         return False
Example #11
0
def pre_start(node):
    """
    the top-level ContainerPilot `preStart` handler.
    MySQL must be running in order to execute most of our setup behavior
    so we're just going to make sure the directory structures are in
    place and then let the first health check handler take it from there
    """
    # make sure that if we've pulled in an external data volume that
    # the mysql user can read it
    my = node.mysql
    my.take_ownership()
    my.render()
    if not os.path.isdir(os.path.join(my.datadir, 'mysql')):
        last_backup = node.consul.has_snapshot()
        if last_backup:
            node.manta.get_backup(last_backup)
            my.restore_from_snapshot(last_backup)
        else:
            if not my.initialize_db():
                log.info('Skipping database setup.')
Example #12
0
    def setup_root_user(self, conn):
        """
        Create the root user and optionally give it a random root password
        """
        if self.mysql_random_root_password:
            # we could use --random-passwords in our call to `mysql_install_db`
            # instead here but we want to have the root password available
            # until we're done with this setup.
            chars = string.ascii_letters + string.digits + '!@#$%&^*()'
            passwd = ''.join([chars[int(os.urandom(1).encode('hex'), 16) % len(chars)]
                              for _ in range(20)])
            self.mysql_root_password = passwd
            log.info('Generated root password: %s', self.mysql_root_password)

        self.add('SET @@SESSION.SQL_LOG_BIN=0;')
        self.add('DELETE FROM `mysql`.`user` where user != \'mysql.sys\';')
        self.add('CREATE USER `root`@`%` IDENTIFIED BY %s ;',
                 (self.mysql_root_password,))
        self.add('GRANT ALL ON *.* TO `root`@`%` WITH GRANT OPTION ;')
        self.add('DROP DATABASE IF EXISTS test ;')
        self.add('FLUSH PRIVILEGES ;')
        self.execute_many(conn=conn)
    def setup_root_user(self, conn):
        """
        Create the root user and optionally give it a random root password
        """
        if self.mysql_random_root_password:
            # we could use --random-passwords in our call to `mysql_install_db`
            # instead here but we want to have the root password available
            # until we're done with this setup.
            chars = string.ascii_letters + string.digits + '!@#$%&^*()'
            passwd = ''.join([
                chars[int(os.urandom(1).encode('hex'), 16) % len(chars)]
                for _ in range(20)
            ])
            self.mysql_root_password = passwd
            log.info('Generated root password: %s', self.mysql_root_password)

        self.add('SET @@SESSION.SQL_LOG_BIN=0;')
        self.add('DELETE FROM `mysql`.`user` where user != \'mysql.sys\';')
        self.add('CREATE USER `root`@`%` IDENTIFIED BY %s ;',
                 (self.mysql_root_password, ))
        self.add('GRANT ALL ON *.* TO `root`@`%` WITH GRANT OPTION ;')
        self.add('DROP DATABASE IF EXISTS test ;')
        self.add('FLUSH PRIVILEGES ;')
        self.execute_many(conn=conn)
Example #14
0
 def reload(self):
     """ Force ContainerPilot to reload its configuration """
     log.info('Reloading ContainerPilot configuration.')
     os.kill(1, signal.SIGHUP)
Example #15
0
 def _render(self):
     """ Writes the current config to file. """
     new_config = json.dumps(self.config)
     with open(self.path, 'w') as f:
         log.info('rewriting ContainerPilot config: %s', new_config)
         f.write(new_config)
 def reload(self):
     """ Force ContainerPilot to reload its configuration """
     log.info('Reloading ContainerPilot configuration.')
     os.kill(1, signal.SIGHUP)
 def _render(self):
     """ Writes the current config to file. """
     new_config = json.dumps(self.config)
     with open(self.path, 'w') as f:
         log.info('rewriting ContainerPilot config: %s', new_config)
         f.write(new_config)