def test_leaving(self, client): """Create a plugin not in the cluster and try to leave the cluster. Nothing should be written to etcd.""" e = EtcdSynchronizer(self.plugin, self.watcher_ip) e.start_thread() e.leave_cluster() e._client.write.assert_not_called() e.terminate()
def test_scale_down(self): # Start with a stable cluster of two nodes sync1 = EtcdSynchronizer(DummyPlugin(None), '10.0.1.1') sync2 = EtcdSynchronizer(DummyPlugin(None), '10.0.1.2') mock_client = sync1._client mock_client.write("/test", json.dumps({"10.0.1.1": "normal", "10.0.1.2": "normal"})) for s in [sync1, sync2]: s.start_thread() # Make the second node leave sync2.leave_cluster() sync2.thread.join(20) sync2.terminate() self.wait_for_all_normal(mock_client, required_number=1) # Check that it's left and the cluster is stable end = json.loads(mock_client.read("/test").value) self.assertEqual(None, end.get("10.0.1.2")) self.assertEqual("normal", end.get("10.0.1.1")) sync1.terminate()
def test_failure(self): # Create synchronisers, using a FailPlugin for one which will crash and # not complete (simulating a failed node) sync1 = EtcdSynchronizer(DummyPlugin(None), '10.0.0.1') sync2 = EtcdSynchronizer(FailPlugin(None), '10.0.0.2') sync3 = EtcdSynchronizer(DummyPlugin(None), '10.0.0.3') mock_client = sync1._client for s in [sync1, sync2, sync3]: s.start_thread() # After a few seconds, the scale-up will still not have completed sleep(3) end = json.loads(mock_client.read("/test").value) self.assertNotEqual("normal", end.get("10.0.0.1")) self.assertNotEqual("normal", end.get("10.0.0.2")) self.assertNotEqual("normal", end.get("10.0.0.3")) # Start a synchroniser to take 10.0.0.2's place sync2.terminate() error_syncer = EtcdSynchronizer(NullPlugin('/test'), '10.0.0.2', force_leave=True) error_syncer.mark_node_failed() error_syncer.leave_cluster() error_syncer.start_thread() # 10.0.0.2 will be removed from the cluster, and the cluster will # stabilise self.wait_for_all_normal(mock_client, required_number=2, tries=50) end = json.loads(mock_client.read("/test").value) self.assertEqual("normal", end.get("10.0.0.1")) self.assertEqual("normal", end.get("10.0.0.3")) self.assertEqual(None, end.get("10.0.0.2")) for s in [sync1, sync3, error_syncer]: s.terminate()
level=logging.DEBUG) local_ip = sys.argv[1] site = sys.argv[2] node_type = sys.argv[3] datastore = sys.argv[4] dead_node_ip = sys.argv[5] key = make_key(site, node_type, datastore) logging.info("Using etcd key %s" % (key)) error_syncer = EtcdSynchronizer(NullPlugin(key), dead_node_ip, etcd_ip=local_ip, force_leave=True) print "Marking node as failed and removing it from the cluster - will take at least 30 seconds" # Move the dead node into ERROR state to allow in-progress operations to # complete error_syncer.mark_node_failed() # Move the dead node out of the cluster error_syncer.start_thread() error_syncer.leave_cluster() # Wait for it to leave error_syncer.thread.join() print "Process complete - %s has left the cluster" % dead_node_ip c = etcd.Client(local_ip, 4000) new_state = c.get(key).value logging.info("New etcd state (after removing %s) is %s" % (dead_node_ip, new_state))
etcd_ip=local_ip, force_leave=True) except ImportError: print "You must run mark_node_failed on a node that has Cassandra installed to remove a node from a Cassandra cluster" sys.exit(1) else: error_syncer = EtcdSynchronizer(NullPlugin(key), dead_node_ip, etcd_ip=local_ip, force_leave=True) print "Marking node as failed and removing it from the cluster - will take at least 30 seconds" # Move the dead node into ERROR state to allow in-progress operations to # complete error_syncer.mark_node_failed() # Move the dead node out of the cluster error_syncer.start_thread() error_syncer.leave_cluster() # Wait for it to leave error_syncer.thread.join() print "Process complete - %s has left the cluster" % dead_node_ip c = etcd.Client(local_ip, 4000) new_state = c.get(key).value logging.info("New etcd state (after removing %s) is %s" % (dead_node_ip, new_state))