def is_all_peers_stopped(responses): """Establish whether all peers have stopped their proxy services. Each peer unit will set stop-proxy-service-ack to rq value to indicate that it has stopped its proxy service. We wait for all units to be stopped before triggering a sync. Peer services will be restarted once their rings are synced with the leader. To be safe, default expectation is that api is still running. """ rq_key = SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC ack_key = SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC_ACK token = relation_get(attribute=rq_key, unit=local_unit()) if not token or token != responses[0].get(ack_key): log("Token mismatch, rq and ack tokens differ (expected ack={}, " "got={})".format(token, responses[0].get(ack_key)), level=DEBUG) return False if not all_responses_equal(responses, ack_key): log( "Not all ack responses are equal. Either we are still waiting " "for responses or we were not the request originator.", level=DEBUG) return False return True
def cluster_leader_actions(): """Cluster relation hook actions to be performed by leader units. NOTE: must be called by leader from cluster relation hook. """ log("Cluster changed by unit=%s (local is leader)" % (remote_unit()), level=DEBUG) # If we have received an ack, check other units settings = relation_get() or {} ack_key = SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC_ACK # Protect against leader changing mid-sync if settings.get(SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC): log("Sync request received yet this is leader unit. This would " "indicate that the leader has changed mid-sync - stopping proxy " "and notifying peers", level=ERROR) service_stop('swift-proxy') SwiftProxyClusterRPC().notify_leader_changed() return elif ack_key in settings: token = settings[ack_key] # Find out if all peer units have been stopped. responses = [] for rid in relation_ids('cluster'): for unit in related_units(rid): responses.append(relation_get(rid=rid, unit=unit)) # Ensure all peers stopped before starting sync if all_peers_stopped(responses): key = 'peers-only' if not all_responses_equal(responses, key, must_exist=False): msg = ("Did not get equal response from every peer unit for " "'%s'" % (key)) raise SwiftProxyCharmException(msg) peers_only = int(get_first_available_value(responses, key, default=0)) log("Syncing rings and builders (peers-only=%s)" % (peers_only), level=DEBUG) broadcast_rings_available(token, storage=not peers_only) else: log("Not all peer apis stopped - skipping sync until all peers " "ready (got %s)" % (responses), level=INFO) CONFIGS.write_all()
def all_peers_stopped(responses): """Establish whether all peers have stopped their proxy services. Each peer unit will set stop-proxy-service-ack to rq value to indicate that it has stopped its proxy service. We wait for all units to be stopped before triggering a sync. Peer services will be restarted once their rings are synced with the leader. To be safe, default expectation is that api is still running. """ rq_key = SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC ack_key = SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC_ACK token = relation_get(attribute=rq_key, unit=local_unit()) if not token or token != responses[0].get(ack_key): log("Unmatched token in ack (expected=%s, got=%s)" % (token, responses[0].get(ack_key)), level=DEBUG) return False if not all_responses_equal(responses, ack_key): return False return True
def test_all_responses_equal(self): responses = [{'a': 1, 'c': 3}] self.assertTrue( swift_utils.all_responses_equal(responses, 'b', must_exist=False)) responses = [{'a': 1, 'c': 3}] self.assertFalse(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 2, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}, {'a': 2, 'b': 2, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 3, 'c': 3}] self.assertFalse(swift_utils.all_responses_equal(responses, 'b'))
def test_all_responses_equal(self): responses = [{'a': 1, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b', must_exist=False)) responses = [{'a': 1, 'c': 3}] self.assertFalse(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 2, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}, {'a': 2, 'b': 2, 'c': 3}] self.assertTrue(swift_utils.all_responses_equal(responses, 'b')) responses = [{'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 3, 'c': 3}] self.assertFalse(swift_utils.all_responses_equal(responses, 'b'))
def cluster_leader_actions(): """Cluster relation hook actions to be performed by leader units. NOTE: must be called by leader from cluster relation hook. """ log("Cluster changed by unit={} (local is leader)".format(remote_unit()), level=DEBUG) rx_settings = relation_get() or {} tx_settings = relation_get(unit=local_unit()) or {} rx_rq_token = rx_settings.get(SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC) rx_ack_token = rx_settings.get(SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC_ACK) tx_rq_token = tx_settings.get(SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC) tx_ack_token = tx_settings.get(SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC_ACK) rx_leader_changed = \ rx_settings.get(SwiftProxyClusterRPC.KEY_NOTIFY_LEADER_CHANGED) if rx_leader_changed: log( "Leader change notification received and this is leader so " "retrying sync.", level=INFO) # FIXME: check that we were previously part of a successful sync to # ensure we have good rings. cluster_sync_rings(peers_only=tx_settings.get('peers-only', False), token=rx_leader_changed) return rx_resync_request = \ rx_settings.get(SwiftProxyClusterRPC.KEY_REQUEST_RESYNC) resync_request_ack_key = SwiftProxyClusterRPC.KEY_REQUEST_RESYNC_ACK tx_resync_request_ack = tx_settings.get(resync_request_ack_key) if rx_resync_request and tx_resync_request_ack != rx_resync_request: log("Unit '{}' has requested a resync".format(remote_unit()), level=INFO) cluster_sync_rings(peers_only=True) relation_set(**{resync_request_ack_key: rx_resync_request}) return # If we have received an ack token ensure it is not associated with a # request we received from another peer. If it is, this would indicate # a leadership change during a sync and this unit will abort the sync or # attempt to restore the original leader so to be able to complete the # sync. if rx_ack_token and rx_ack_token == tx_rq_token: # Find out if all peer units have been stopped. responses = [] for rid in relation_ids('cluster'): for unit in related_units(rid): responses.append(relation_get(rid=rid, unit=unit)) # Ensure all peers stopped before starting sync if is_all_peers_stopped(responses): key = 'peers-only' if not all_responses_equal(responses, key, must_exist=False): msg = ("Did not get equal response from every peer unit for " "'{}'".format(key)) raise SwiftProxyCharmException(msg) peers_only = bool( get_first_available_value(responses, key, default=0)) log("Syncing rings and builders (peers-only={})".format( peers_only), level=DEBUG) broadcast_rings_available(broker_token=rx_ack_token, storage=not peers_only) else: key = SwiftProxyClusterRPC.KEY_STOP_PROXY_SVC_ACK acks = ', '.join([rsp[key] for rsp in responses if key in rsp]) log("Not all peer apis stopped - skipping sync until all peers " "ready (current='{}', token='{}')".format(acks, tx_ack_token), level=INFO) elif ((rx_ack_token and (rx_ack_token == tx_ack_token)) or (rx_rq_token and (rx_rq_token == rx_ack_token))): log( "It appears that the cluster leader has changed mid-sync - " "stopping proxy service", level=WARNING) service_stop('swift-proxy') broker = rx_settings.get('builder-broker') if broker: # If we get here, manual intervention will be required in order # to restore the cluster. raise SwiftProxyCharmException( "Failed to restore previous broker '{}' as leader".format( broker)) else: raise SwiftProxyCharmException( "No builder-broker on rx_settings relation from '{}' - " "unable to attempt leader restore".format(remote_unit())) else: log("Not taking any sync actions", level=DEBUG) CONFIGS.write_all()