def main(): """ Parse argument as command and execute that command with parameters containing the state of MySQL, ContainerPilot, etc. Default behavior is to run `pre_start` DB initialization. """ if len(sys.argv) == 1: consul = Consul(envs={'CONSUL': os.environ.get('CONSUL', 'consul')}) cmd = pre_start else: consul = Consul() try: cmd = globals()[sys.argv[1]] except KeyError: log.error('Invalid command: %s', sys.argv[1]) sys.exit(1) my = MySQL() snapshot_backend = os.environ.get('SNAPSHOT_BACKEND', 'manta') if snapshot_backend == 'local': snaps = Local() elif snapshot_backend == 'minio': snaps = Minio() else: snaps = Manta() cp = ContainerPilot() cp.load() node = Node(mysql=my, consul=consul, snaps=snaps, cp=cp) cmd(node) my.close()
def test_parse_without_consul_agent(self): self.environ['CONSUL_AGENT'] = '0' consul = Consul(self.environ) self.assertEqual(consul.host, 'my.consul.example.com') self.environ['CONSUL_AGENT'] = '' consul = Consul(self.environ) self.assertEqual(consul.host, 'my.consul.example.com')
def test_binlog_stale(self): """ Snapshot if the binlog is stale even if its not time to do so """ consul = Consul(envs=get_environ()) binlog_file = 'mysql.002' now = datetime.utcnow().isoformat() consul_values = { LAST_BACKUP_KEY: '{{"id": "xxxx", "dt": "{}"}}'.format(now), LAST_BINLOG_KEY: 'mysql.001', } consul.get = consul_values.__getitem__ self.assertTrue(consul.is_snapshot_stale(binlog_file))
def test_is_snapshot_stale_invalid(self): """ Snapshot if the timer has elapsed even if the binlog isn't stale""" consul = Consul(envs=get_environ()) binlog_file = 'mysql.001' consul_values = { LAST_BACKUP_KEY: '{"id": "xxxx", "dt": "yyyyy"}', LAST_BINLOG_KEY: 'mysql.001', } consul.get = consul_values.__getitem__ try: self.assertTrue(consul.is_snapshot_stale(binlog_file)) self.fail('Expected ValueError with invalid data in Consul') except ValueError: pass # not stale now = datetime.utcnow().isoformat() consul_values = { LAST_BACKUP_KEY: '{{"id": "xxxx", "dt": "{}"}}'.format(now), LAST_BINLOG_KEY: 'mysql.001', } consul.get = consul_values.__getitem__ self.assertFalse(consul.is_snapshot_stale(binlog_file)) # stale then = (datetime.utcnow() - timedelta(hours=25)).isoformat() consul_values = { LAST_BACKUP_KEY: '{{"id": "xxxx", "dt": "{}"}}'.format(then), LAST_BINLOG_KEY: 'mysql.001', } consul.get = consul_values.__getitem__ self.assertTrue(consul.is_snapshot_stale(binlog_file))
def test_replica_first_pass_replication_setup_fails(self): """ Given uninitialized node w/ failed replication setup, fail """ self.node.mysql = MySQL(envs=get_environ()) self.node.mysql._conn = mock.MagicMock() self.node.mysql.query = mock.MagicMock(return_value=()) self.node.mysql.wait_for_connection = mock.MagicMock(return_value=True) self.node.mysql.setup_replication = mock.MagicMock(return_value=True) self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.client.health.service.return_value = [ 0, [{ 'Service': { 'ID': 'node2', 'Address': '192.168.1.102' }, }] ] try: logging.getLogger().setLevel(logging.CRITICAL) # noisy manage.health(self.node) self.fail('Should have exited but did not.') except SystemExit: pass calls = [ mock.call.query('show slave status'), mock.call.query('show slave hosts'), mock.call.query('show slave status') ] self.node.mysql.query.assert_has_calls(calls) self.assertEqual(self.node.consul.client.health.service.call_count, 2) manage.write_snapshot.assert_called_once() self.assertEqual(self.node.cp.state, REPLICA)
def test_replica_no_replication(self): """ Health check for failure mode where initial replication setup failed but a primary already exists in Consul. """ os.mkdir(self.LOCK_PATH, 0700) self.node.mysql = MySQL(envs=get_environ()) self.node.mysql._conn = mock.MagicMock() self.node.mysql.query = mock.MagicMock(return_value=()) self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.renew_session = mock.MagicMock() self.node.consul.client.health.service.return_value = [ 0, [{ 'Service': { 'ID': 'node2', 'Address': '192.168.1.102' }, }] ] try: logging.getLogger().setLevel(logging.CRITICAL) # noisy manage.health(self.node) self.fail('Should have exited but did not.') except SystemExit: pass calls = [ mock.call.query('show slave status'), mock.call.query('show slave hosts'), mock.call.query('show slave status') ] self.node.mysql.query.assert_has_calls(calls) self.assertFalse(self.node.consul.renew_session.called) self.assertEqual(self.node.cp.state, REPLICA)
def test_primary_no_replicas_no_consul_state_fails(self): """ Health check if previously initialized but with no replicas and no Consul state so we'll remain marked UNASSIGNED which needs to be a failing health check. """ os.mkdir(self.LOCK_PATH, 0700) self.node.mysql = MySQL(envs=get_environ()) self.node.mysql._conn = mock.MagicMock() self.node.mysql.query = mock.MagicMock(return_value=()) self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.renew_session = mock.MagicMock() self.node.consul.client.health.service.return_value = [] try: logging.getLogger().setLevel(logging.CRITICAL) # noisy manage.health(self.node) self.fail('Should have exited but did not.') except SystemExit: pass calls = [ mock.call.query('show slave status'), mock.call.query('show slave hosts'), ] self.node.mysql.query.assert_has_calls(calls) self.assertEqual(self.node.consul.client.health.service.call_count, 2) self.assertEqual(self.node.cp.state, UNASSIGNED)
def test_primary_no_replicas(self): """ Health check if previously initialized but with no replicas """ os.mkdir(self.LOCK_PATH, 0700) self.node.mysql = MySQL(envs=get_environ()) self.node.mysql._conn = mock.MagicMock() self.node.mysql.query = mock.MagicMock(return_value=()) self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.renew_session = mock.MagicMock() self.node.consul.client.health.service.return_value = [ 0, [{ 'Service': { 'ID': 'node1', 'Address': '192.168.1.101' }, }] ] manage.health(self.node) calls = [ mock.call.query('show slave status'), mock.call.query('show slave hosts'), mock.call.query('select 1') ] self.node.mysql.query.assert_has_calls(calls) self.node.consul.client.health.service.assert_called_once() self.node.consul.renew_session.assert_called_once() self.assertEqual(self.node.cp.state, PRIMARY)
def test_primary_first_pass(self): """ Given uninitialized node w/ no other instances running, set up for running as the primary. """ self.node.mysql.wait_for_connection.return_value = True self.node.mysql.get_primary.side_effect = UnknownPrimary() self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.mark_as_primary = mock.MagicMock(return_value=True) self.node.consul.renew_session = mock.MagicMock() manage.write_snapshot = mock.MagicMock(return_value=True) self.node.consul.client.health.service.return_value = () try: manage.health(self.node) self.fail('Should have exited but did not.') except SystemExit: pass calls = [ mock.call.setup_root_user(True), mock.call.create_db(True), mock.call.create_default_user(True), mock.call.create_repl_user(True), mock.call.expire_root_password(True) ] self.node.mysql.assert_has_calls(calls) manage.write_snapshot.assert_called_once() self.assertEqual(self.node.cp.state, PRIMARY)
def test_failover_locked_another_node_is_primary(self): """ Given another node is running a failover, wait for that failover. Given this this node is not marked primary, the node will not update its ContainerPilot config. """ def query_results(*args, **kwargs): yield () yield () # and after two hits we've set up replication yield [{ 'Master_Server_Id': 'node2', 'Master_Host': '192.168.1.102' }] self.node.mysql = MySQL(envs=get_environ()) self.node.mysql._conn = mock.MagicMock() self.node.mysql.query = mock.MagicMock(side_effect=query_results()) self.node.mysql.failover = mock.MagicMock() def consul_get_primary_results(*args, **kwargs): yield UnknownPrimary() yield UnknownPrimary() yield ('node2', '192.168.1.102') def lock_sequence(*args, **kwargs): yield True yield False self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.put = mock.MagicMock() self.node.consul.get_primary = mock.MagicMock( side_effect=consul_get_primary_results()) self.node.consul.lock_failover = mock.MagicMock(return_value=False) self.node.consul.unlock_failover = mock.MagicMock() self.node.consul.is_locked = mock.MagicMock( side_effect=lock_sequence()) with mock.patch('time.sleep'): # cuts 3 sec from test run manage.on_change(self.node) self.assertEqual(self.node.consul.get_primary.call_count, 2) self.node.consul.lock_failover.assert_called_once() self.assertFalse(self.node.consul.client.health.service.called) self.assertFalse(self.node.consul.unlock_failover.called) self.assertFalse(self.node.consul.put.called) self.assertFalse(self.node.cp.reload.called) self.assertEqual(self.node.cp.state, REPLICA)
def test_backup_already_running(self): """ Don't snapshot if there's already a snapshot running """ self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.client.session.create.return_value = 'xyzzy' with mock.patch('manage.write_snapshot') as ws: lockfile_name = '/tmp/' + BACKUP_LOCK_KEY try: backup_lock = open(lockfile_name, 'w') fcntl.flock(backup_lock, fcntl.LOCK_EX | fcntl.LOCK_NB) manage.snapshot_task(self.node) finally: fcntl.flock(backup_lock, fcntl.LOCK_UN) backup_lock.close() self.assertFalse(ws.called)
def test_backup_unlocked(self): """ Make sure that if a snapshot has run that we unlock correctly. """ self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.client.session.create.return_value = 'xyzzy' with mock.patch('manage.write_snapshot') as ws: lockfile_name = '/tmp/' + BACKUP_LOCK_KEY try: backup_lock = open(lockfile_name, 'w') fcntl.flock(backup_lock, fcntl.LOCK_EX | fcntl.LOCK_NB) manage.snapshot_task(self.node) finally: fcntl.flock(backup_lock, fcntl.LOCK_UN) backup_lock.close() manage.snapshot_task(self.node) self.assertTrue(ws.called)
def test_pre_start_snapshot_incomplete(self): """ Given a snapshot that has been marked successful but not completed, a new node will wait and not crash. """ self.node.consul = Consul(get_environ()) self.node.consul.client = mock.MagicMock() def kv_gets(*args, **kwargs): yield pyconsul.ConsulException() yield [0, {'Value': '{"id": "xxxx", "dt": "yyyyy"}'}] self.node.consul.client.kv.get.side_effect = kv_gets() manage.pre_start(self.node) self.node.snaps.get_backup.assert_called_once() self.assertEqual(self.node.consul.client.kv.get.call_count, 2) self.node.mysql.restore_from_snapshot.assert_called_once() self.assertFalse(self.node.mysql.initialize_db.called)
def test_replica_first_pass_primary_lockout(self): """ Given uninitialized node w/ no primary, then a health primary retry setting up as a replica """ self.node.mysql.wait_for_connection.return_value = True self.node.mysql.get_primary.side_effect = UnknownPrimary() self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.mark_as_primary = mock.MagicMock(return_value=False) self.node.consul.client.health.service.return_value = () try: logging.getLogger().setLevel(logging.CRITICAL) # noisy manage.health(self.node) self.fail('Should have exited but did not.') except SystemExit: pass self.assertEqual(self.node.cp.state, UNASSIGNED)
def test_replica_first_pass(self): """ Given uninitialized node w/ a health primary, set up replication. """ self.node.mysql = MySQL(envs=get_environ()) self.node.mysql._conn = mock.MagicMock() self.node.mysql.query = mock.MagicMock() def query_results(*args, **kwargs): yield () yield () # and after two hits we've set up replication yield [{ 'Master_Server_Id': 'node2', 'Master_Host': '192.168.1.102' }] self.node.mysql.query.side_effect = query_results() self.node.mysql.wait_for_connection = mock.MagicMock(return_value=True) self.node.mysql.setup_replication = mock.MagicMock(return_value=True) self.node.consul = Consul(envs=get_environ()) self.node.consul.client = mock.MagicMock() self.node.consul.client.health.service.return_value = [ 0, [{ 'Service': { 'ID': 'node2', 'Address': '192.168.1.102' }, }] ] manage.health(self.node) calls = [ mock.call.query('show slave status'), mock.call.query('show slave hosts'), mock.call.query('show slave status') ] self.node.mysql.query.assert_has_calls(calls) self.assertEqual(self.node.consul.client.health.service.call_count, 2) manage.write_snapshot.assert_called_once() self.assertEqual(self.node.cp.state, REPLICA)
def test_parse_with_consul_agent(self): self.environ['CONSUL_AGENT'] = '1' consul = Consul(self.environ) self.assertEqual(consul.host, 'localhost')