def test_manual_failover_from_leader(self): self.ha.has_lock = true self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', '')) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, '', MockPostgresql.name)) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, '', 'blabla')) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') f = Failover(0, MockPostgresql.name, '') self.ha.cluster = get_cluster_initialized_with_leader(f) self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself') self.ha.fetch_node_status = lambda e: (e, True, True, 0, { 'nofailover': 'True' }) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') # manual failover from the previous leader to us won't happen if we hold the nofailover flag self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', MockPostgresql.name)) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock')
def test_manual_failover_from_leader(self): self.ha.fetch_node_status = get_node_status() self.ha.has_lock = true self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', '', None)) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, '', self.p.name, None)) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, '', 'blabla', None)) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') f = Failover(0, self.p.name, '', None) self.ha.cluster = get_cluster_initialized_with_leader(f) self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself') self.ha.fetch_node_status = get_node_status(nofailover=True) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') self.ha.fetch_node_status = get_node_status(xlog_location=1) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') # manual failover from the previous leader to us won't happen if we hold the nofailover flag self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', self.p.name, None)) self.assertEquals(self.ha.run_cycle(), 'no action. i am the leader with the lock') # Failover scheduled time must include timezone scheduled = datetime.datetime.now() self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', self.p.name, scheduled)) self.ha.run_cycle() scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc) self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', self.p.name, scheduled)) self.assertEquals('no action. i am the leader with the lock', self.ha.run_cycle()) scheduled = scheduled + datetime.timedelta(seconds=30) self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', self.p.name, scheduled)) self.assertEquals('no action. i am the leader with the lock', self.ha.run_cycle()) scheduled = scheduled + datetime.timedelta(seconds=-600) self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', self.p.name, scheduled)) self.assertEquals('no action. i am the leader with the lock', self.ha.run_cycle()) scheduled = None self.ha.cluster = get_cluster_initialized_with_leader( Failover(0, 'blabla', self.p.name, scheduled)) self.assertEquals('no action. i am the leader with the lock', self.ha.run_cycle())
def test_manual_failover_from_leader_in_pause(self): self.ha.has_lock = true self.ha.is_paused = true scheduled = datetime.datetime.now() self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled)) self.assertEquals('PAUSE: no action. i am the leader with the lock', self.ha.run_cycle()) self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, '', None)) self.assertEquals('PAUSE: no action. i am the leader with the lock', self.ha.run_cycle())
def test_manual_failover_from_leader_in_synchronous_mode(self): self.p.is_leader = true self.ha.has_lock = true self.ha.is_synchronous_mode = true self.ha.is_failover_possible = false self.ha.process_sync_replication = Mock() self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, None)) self.assertEquals('no action. i am the leader with the lock', self.ha.run_cycle()) self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, 'a')) self.ha.is_failover_possible = true self.assertEquals('manual failover: demoting myself', self.ha.run_cycle())
def test_manual_failover_process_no_leader_in_pause(self): self.ha.is_paused = true self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None)) self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock') self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', '', None)) self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock') self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', 'blabla', None)) self.assertEquals('PAUSE: acquired session lock as a leader', self.ha.run_cycle()) self.p.is_leader = false self.p.set_role('replica') self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', self.p.name, None)) self.assertEquals(self.ha.run_cycle(), 'PAUSE: promoted self to leader by acquiring session lock')
def _load_cluster(self): try: result = self.retry(self._client.read, self.client_path(''), recursive=True) nodes = {os.path.relpath(node.key, result.key): node for node in result.leaves} # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize.value # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation.value) # get list of members members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1] # get leader leader = nodes.get(self._LEADER) if leader: member = Member(-1, leader.value, None, {}) member = ([m for m in members if m.name == leader.value] or [member])[0] leader = Leader(leader.modifiedIndex, leader.ttl, member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover.modifiedIndex, failover.value) self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover) except etcd.EtcdKeyNotFound: self._cluster = Cluster(False, None, None, [], None) except: logger.exception('get_cluster') raise EtcdError('Etcd is not responding properly')
def _load_cluster(self): try: path = self.client_path('/') _, results = self._client.kv.get(path, recurse=True) if results is None: raise NotFound nodes = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') nodes[os.path.relpath(node['Key'], path)] = node # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize['Value'] # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int( last_leader_operation['Value']) # get list of members members = [ self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1 ] # get leader leader = nodes.get(self._LEADER) if leader and leader[ 'Value'] == self._name and self._session != leader.get( 'Session', 'x'): logger.info( 'I am leader but not owner of the session. Removing leader node' ) self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex']) leader = None if leader: member = Member(-1, leader['Value'], None, {}) member = ([m for m in members if m.name == leader['Value']] or [member])[0] leader = Leader(leader['ModifyIndex'], leader.get('Session'), member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover['ModifyIndex'], failover['Value']) self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover) except NotFound: self._cluster = Cluster(False, None, None, [], None) except: logger.exception('get_cluster') raise ConsulError('Consul is not responding properly')
def _load_cluster(self): try: path = self.client_path('/') _, results = self.retry(self._client.kv.get, path, recurse=True) if results is None: raise NotFound nodes = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') nodes[os.path.relpath(node['Key'], path)] = node # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize['Value'] # get global dynamic configuration config = nodes.get(self._CONFIG) config = config and ClusterConfig.from_node(config['ModifyIndex'], config['Value']) # get timeline history history = nodes.get(self._HISTORY) history = history and TimelineHistory.from_node(history['ModifyIndex'], history['Value']) # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation['Value']) # get list of members members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1] # get leader leader = nodes.get(self._LEADER) if not self._ctl and leader and leader['Value'] == self._name \ and self._session != leader.get('Session', 'x'): logger.info('I am leader but not owner of the session. Removing leader node') self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex']) leader = None if leader: member = Member(-1, leader['Value'], None, {}) member = ([m for m in members if m.name == leader['Value']] or [member])[0] leader = Leader(leader['ModifyIndex'], leader.get('Session'), member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover['ModifyIndex'], failover['Value']) # get synchronization state sync = nodes.get(self._SYNC) sync = SyncState.from_node(sync and sync['ModifyIndex'], sync and sync['Value']) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history) except NotFound: self._cluster = Cluster(None, None, None, None, [], None, None, None) except Exception: logger.exception('get_cluster') raise ConsulError('Consul is not responding properly')
def _load_cluster(self): try: path = self.client_path('/') _, results = self.retry(self._client.kv.get, path, recurse=True) if results is None: raise NotFound nodes = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') nodes[node['Key'][len(path):].lstrip('/')] = node # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize['Value'] # get global dynamic configuration config = nodes.get(self._CONFIG) config = config and ClusterConfig.from_node(config['ModifyIndex'], config['Value']) # get timeline history history = nodes.get(self._HISTORY) history = history and TimelineHistory.from_node(history['ModifyIndex'], history['Value']) # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation['Value']) # get list of members members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1] # get leader leader = nodes.get(self._LEADER) if not self._ctl and leader and leader['Value'] == self._name \ and self._session != leader.get('Session', 'x'): logger.info('I am leader but not owner of the session. Removing leader node') self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex']) leader = None if leader: member = Member(-1, leader['Value'], None, {}) member = ([m for m in members if m.name == leader['Value']] or [member])[0] leader = Leader(leader['ModifyIndex'], leader.get('Session'), member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover['ModifyIndex'], failover['Value']) # get synchronization state sync = nodes.get(self._SYNC) sync = SyncState.from_node(sync and sync['ModifyIndex'], sync and sync['Value']) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history) except NotFound: self._cluster = Cluster(None, None, None, None, [], None, None, None) except Exception: logger.exception('get_cluster') raise ConsulError('Consul is not responding properly')
def test_manual_failover_while_starting(self): self.ha.has_lock = true self.p.check_for_startup = true f = Failover(0, self.p.name, '', None) self.ha.cluster = get_cluster_initialized_with_leader(f) self.ha.fetch_node_status = get_node_status() # accessible, in_recovery self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
def _load_cluster(self): try: result = self.retry(self.client.read, self.client_path(''), recursive=True) nodes = {os.path.relpath(node.key, result.key): node for node in result.leaves} # get initialize flag initialize = nodes.get(self._INITIALIZE, None) initialize = initialize and initialize.value # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME, None) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation.value) # get list of members members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1] # get leader leader = nodes.get(self._LEADER, None) if leader: member = Member(-1, leader.value, None, {}) member = ([m for m in members if m.name == leader.value] or [member])[0] leader = Leader(leader.modifiedIndex, leader.ttl, member) # failover key failover = nodes.get(self._FAILOVER, None) if failover: failover = Failover.from_node(failover.modifiedIndex, failover.value) self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover) except etcd.EtcdKeyNotFound: self._cluster = Cluster(False, None, None, [], None) except: logger.exception('get_cluster') raise EtcdError('Etcd is not responding properly')
def test_process_healthy_cluster_in_pause(self): self.p.is_leader = false self.ha.is_paused = true self.p.name = 'leader' self.ha.cluster = get_cluster_initialized_with_leader() self.assertEquals(self.ha.run_cycle(), 'PAUSE: removed leader lock because postgres is not running as master') self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', self.p.name, None)) self.assertEquals(self.ha.run_cycle(), 'PAUSE: waiting to become master after promote...')
def _load_cluster(self): try: result = self.retry(self._client.read, self.client_path(''), recursive=True) nodes = { os.path.relpath(node.key, result.key): node for node in result.leaves } # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize.value # get global dynamic configuration config = nodes.get(self._CONFIG) config = config and ClusterConfig.from_node( config.modifiedIndex, config.value) # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int( last_leader_operation.value) # get list of members members = [ self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1 ] # get leader leader = nodes.get(self._LEADER) if leader: member = Member(-1, leader.value, None, {}) member = ([m for m in members if m.name == leader.value] or [member])[0] index = result.etcd_index if result.etcd_index > leader.modifiedIndex else leader.modifiedIndex + 1 leader = Leader(index, leader.ttl, member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover.modifiedIndex, failover.value) # get synchronization state sync = nodes.get(self._SYNC) sync = SyncState.from_node(sync and sync.modifiedIndex, sync and sync.value) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync) except etcd.EtcdKeyNotFound: self._cluster = Cluster(None, None, None, None, [], None, None) except: logger.exception('get_cluster') raise EtcdError('Etcd is not responding properly')
def _inner_load_cluster(self): self._fetch_cluster = False self.event.clear() nodes = set( self.get_children(self.client_path(''), self.cluster_watcher)) if not nodes: self._fetch_cluster = True # get initialize flag initialize = (self.get_node(self.initialize_path) or [None])[0] if self._INITIALIZE in nodes else None # get global dynamic configuration config = self.get_node( self.config_path, watch=self.cluster_watcher) if self._CONFIG in nodes else None config = config and ClusterConfig.from_node(config[1].version, config[0], config[1].mzxid) # get last leader operation last_leader_operation = self._OPTIME in nodes and self._fetch_cluster and self.get_node( self.leader_optime_path) last_leader_operation = last_leader_operation and int( last_leader_operation[0]) or 0 # get list of members members = self.load_members() if self._MEMBERS[:-1] in nodes else [] # get leader leader = self.get_node( self.leader_path) if self._LEADER in nodes else None if leader: client_id = self._client.client_id if not self._ctl and leader[0] == self._name and client_id is not None \ and client_id[0] != leader[1].ephemeralOwner: logger.info( 'I am leader but not owner of the session. Removing leader node' ) self._client.delete(self.leader_path) leader = None if leader: member = Member(-1, leader[0], None, {}) member = ([m for m in members if m.name == leader[0]] or [member])[0] leader = Leader(leader[1].version, leader[1].ephemeralOwner, member) self._fetch_cluster = member.index == -1 # failover key failover = self.get_node( self.failover_path, watch=self.cluster_watcher) if self._FAILOVER in nodes else None failover = failover and Failover.from_node(failover[1].version, failover[0]) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover)
def test_manual_failover_process_no_leader(self): self.p.is_leader = false self.ha.cluster = get_cluster_initialized_without_leader( failover=Failover(0, '', self.p.name, None)) self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock') self.ha.cluster = get_cluster_initialized_without_leader( failover=Failover(0, '', 'leader', None)) self.p.set_role('replica') self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock') self.ha.fetch_node_status = lambda e: (e, True, True, 0, {} ) # accessible, in_recovery self.assertEquals( self.ha.run_cycle(), 'following a different leader because i am not the healthiest node' ) self.ha.cluster = get_cluster_initialized_without_leader( failover=Failover(0, self.p.name, '', None)) self.assertEquals( self.ha.run_cycle(), 'following a different leader because i am not the healthiest node' ) self.ha.fetch_node_status = lambda e: (e, False, True, 0, {} ) # inaccessible, in_recovery self.p.set_role('replica') self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock') # set failover flag to True for all members of the cluster # this should elect the current member, as we are not going to call the API for it. self.ha.cluster = get_cluster_initialized_without_leader( failover=Failover(0, '', 'other', None)) self.ha.fetch_node_status = lambda e: (e, True, True, 0, { 'nofailover': 'True' }) # accessible, in_recovery self.p.set_role('replica') self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock') # same as previous, but set the current member to nofailover. In no case it should be elected as a leader self.ha.patroni.nofailover = True self.assertEquals( self.ha.run_cycle(), 'following a different leader because I am not allowed to promote')
def _load_cluster(self): prefix = self.client_path('') response = self._sync_obj.get(prefix, recursive=True) if not response: return Cluster(None, None, None, None, [], None, None, None) nodes = { os.path.relpath(key, prefix).replace('\\', '/'): value for key, value in response.items() } # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize['value'] # get global dynamic configuration config = nodes.get(self._CONFIG) config = config and ClusterConfig.from_node(config['index'], config['value']) # get timeline history history = nodes.get(self._HISTORY) history = history and TimelineHistory.from_node( history['index'], history['value']) # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int( last_leader_operation['value']) # get list of members members = [ self.member(k, n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1 ] # get leader leader = nodes.get(self._LEADER) if leader: member = Member(-1, leader['value'], None, {}) member = ([m for m in members if m.name == leader['value']] or [member])[0] leader = Leader(leader['index'], None, member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover['index'], failover['value']) # get synchronization state sync = nodes.get(self._SYNC) sync = SyncState.from_node(sync and sync['index'], sync and sync['value']) return Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history)
def _inner_load_cluster(self): self._fetch_cluster = False self.event.clear() nodes = set(self.get_children(self.client_path(''), self.cluster_watcher)) if not nodes: self._fetch_cluster = True # get initialize flag initialize = (self.get_node(self.initialize_path) or [None])[0] if self._INITIALIZE in nodes else None # get global dynamic configuration config = self.get_node(self.config_path, watch=self.cluster_watcher) if self._CONFIG in nodes else None config = config and ClusterConfig.from_node(config[1].version, config[0], config[1].mzxid) # get timeline history history = self.get_node(self.history_path, watch=self.cluster_watcher) if self._HISTORY in nodes else None history = history and TimelineHistory.from_node(history[1].mzxid, history[0]) # get synchronization state sync = self.get_node(self.sync_path, watch=self.cluster_watcher) if self._SYNC in nodes else None sync = SyncState.from_node(sync and sync[1].version, sync and sync[0]) # get list of members sync_standby = sync.leader == self._name and sync.members or [] members = self.load_members(sync_standby) if self._MEMBERS[:-1] in nodes else [] # get leader leader = self.get_node(self.leader_path) if self._LEADER in nodes else None if leader: client_id = self._client.client_id if not self._ctl and leader[0] == self._name and client_id is not None \ and client_id[0] != leader[1].ephemeralOwner: logger.info('I am leader but not owner of the session. Removing leader node') self._client.delete(self.leader_path) leader = None if leader: member = Member(-1, leader[0], None, {}) member = ([m for m in members if m.name == leader[0]] or [member])[0] leader = Leader(leader[1].version, leader[1].ephemeralOwner, member) self._fetch_cluster = member.index == -1 # get last leader operation last_leader_operation = self._OPTIME in nodes and self.get_leader_optime(leader) # failover key failover = self.get_node(self.failover_path, watch=self.cluster_watcher) if self._FAILOVER in nodes else None failover = failover and Failover.from_node(failover[1].version, failover[0]) return Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history)
def _inner_load_cluster(self): self._fetch_cluster = False self.event.clear() nodes = set( self.get_children(self.client_path(''), self.cluster_watcher)) if not nodes: self._fetch_cluster = True # get initialize flag initialize = (self.get_node(self.initialize_path) or [None])[0] if self._INITIALIZE in nodes else None # get list of members members = self.load_members() if self._MEMBERS[:-1] in nodes else [] # get leader leader = self.get_node( self.leader_path) if self._LEADER in nodes else None if leader: client_id = self._client.client_id if leader[0] == self._name and client_id is not None and client_id[ 0] != leader[1].ephemeralOwner: logger.info( 'I am leader but not owner of the session. Removing leader node' ) self._client.delete(self.leader_path) leader = None if leader: member = Member(-1, leader[0], None, {}) member = ([m for m in members if m.name == leader[0]] or [member])[0] leader = Leader(leader[1].version, leader[1].ephemeralOwner, member) self._fetch_cluster = member.index == -1 # failover key failover = self.get_node( self.failover_path, watch=self.cluster_watcher) if self._FAILOVER in nodes else None if failover: failover = Failover.from_node(failover[1].version, failover[0]) # get last leader operation optime = self.get_node( self.leader_optime_path ) if self._OPTIME in nodes and self._fetch_cluster else None self._last_leader_operation = 0 if optime is None else int(optime[0]) self._cluster = Cluster(initialize, leader, self._last_leader_operation, members, failover)
def _load_cluster(self): try: path = self.client_path('/') _, results = self._client.kv.get(path, recurse=True) if results is None: raise NotFound nodes = {} for node in results: node['Value'] = (node['Value'] or b'').decode('utf-8') nodes[os.path.relpath(node['Key'], path)] = node # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize['Value'] # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation['Value']) # get list of members members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1] # get leader leader = nodes.get(self._LEADER) if leader and leader['Value'] == self._name and self._session != leader.get('Session', 'x'): logger.info('I am leader but not owner of the session. Removing leader node') self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex']) leader = None if leader: member = Member(-1, leader['Value'], None, {}) member = ([m for m in members if m.name == leader['Value']] or [member])[0] leader = Leader(leader['ModifyIndex'], leader.get('Session'), member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover['ModifyIndex'], failover['Value']) self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover) except NotFound: self._cluster = Cluster(False, None, None, [], None) except: logger.exception('get_cluster') raise ConsulError('Consul is not responding properly')
def _load_cluster(self): try: result = self.retry(self._client.read, self.client_path(''), recursive=True) nodes = {os.path.relpath(node.key, result.key): node for node in result.leaves} # get initialize flag initialize = nodes.get(self._INITIALIZE) initialize = initialize and initialize.value # get global dynamic configuration config = nodes.get(self._CONFIG) config = config and ClusterConfig.from_node(config.modifiedIndex, config.value) # get last leader operation last_leader_operation = nodes.get(self._LEADER_OPTIME) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation.value) # get list of members members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1] # get leader leader = nodes.get(self._LEADER) if leader: member = Member(-1, leader.value, None, {}) member = ([m for m in members if m.name == leader.value] or [member])[0] index = result.etcd_index if result.etcd_index > leader.modifiedIndex else leader.modifiedIndex + 1 leader = Leader(index, leader.ttl, member) # failover key failover = nodes.get(self._FAILOVER) if failover: failover = Failover.from_node(failover.modifiedIndex, failover.value) # get synchronization state sync = nodes.get(self._SYNC) sync = SyncState.from_node(sync and sync.modifiedIndex, sync and sync.value) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync) except etcd.EtcdKeyNotFound: self._cluster = Cluster(None, None, None, None, [], None, None) except: logger.exception('get_cluster') raise EtcdError('Etcd is not responding properly')
def _inner_load_cluster(self): self._fetch_cluster = False self.event.clear() nodes = set(self.get_children(self.client_path(''), self.cluster_watcher)) if not nodes: self._fetch_cluster = True # get initialize flag initialize = (self.get_node(self.initialize_path) or [None])[0] if self._INITIALIZE in nodes else None # get global dynamic configuration config = self.get_node(self.config_path, watch=self.cluster_watcher) if self._CONFIG in nodes else None config = config and ClusterConfig.from_node(config[1].version, config[0], config[1].mzxid) # get list of members members = self.load_members() if self._MEMBERS[:-1] in nodes else [] # get leader leader = self.get_node(self.leader_path) if self._LEADER in nodes else None if leader: client_id = self._client.client_id if leader[0] == self._name and client_id is not None and client_id[0] != leader[1].ephemeralOwner: logger.info('I am leader but not owner of the session. Removing leader node') self._client.delete(self.leader_path) leader = None if leader: member = Member(-1, leader[0], None, {}) member = ([m for m in members if m.name == leader[0]] or [member])[0] leader = Leader(leader[1].version, leader[1].ephemeralOwner, member) self._fetch_cluster = member.index == -1 # failover key failover = self.get_node(self.failover_path, watch=self.cluster_watcher) if self._FAILOVER in nodes else None failover = failover and Failover.from_node(failover[1].version, failover[0]) # get last leader operation optime = self.get_node(self.leader_optime_path) if self._OPTIME in nodes and self._fetch_cluster else None self._last_leader_operation = 0 if optime is None else int(optime[0]) self._cluster = Cluster(initialize, config, leader, self._last_leader_operation, members, failover)
def _load_cluster(self): try: # get list of members response = self.retry(self._api.list_namespaced_pod, self._namespace, label_selector=self._label_selector) members = [self.member(pod) for pod in response.items] response = self.retry(self._api.list_namespaced_kind, self._namespace, label_selector=self._label_selector) nodes = {item.metadata.name: item for item in response.items} config = nodes.get(self.config_path) metadata = config and config.metadata annotations = metadata and metadata.annotations or {} # get initialize flag initialize = annotations.get(self._INITIALIZE) # get global dynamic configuration config = ClusterConfig.from_node( metadata and metadata.resource_version, annotations.get(self._CONFIG) or '{}') # get timeline history history = TimelineHistory.from_node( metadata and metadata.resource_version, annotations.get(self._HISTORY) or '[]') leader = nodes.get(self.leader_path) metadata = leader and leader.metadata self._leader_resource_version = metadata.resource_version if metadata else None self._leader_observed_subsets = leader.subsets if self.__subsets and leader else [] annotations = metadata and metadata.annotations or {} # get last leader operation last_leader_operation = annotations.get(self._OPTIME) last_leader_operation = 0 if last_leader_operation is None else int( last_leader_operation) # get leader leader_record = { n: annotations.get(n) for n in (self._LEADER, 'acquireTime', 'ttl', 'renewTime', 'transitions') if n in annotations } if (leader_record or self._leader_observed_record ) and leader_record != self._leader_observed_record: self._leader_observed_record = leader_record self._leader_observed_time = time.time() leader = leader_record.get(self._LEADER) try: ttl = int(leader_record.get('ttl')) or self._ttl except (TypeError, ValueError): ttl = self._ttl if not metadata or not self._leader_observed_time or self._leader_observed_time + ttl < time.time( ): leader = None if metadata: member = Member(-1, leader, None, {}) member = ([m for m in members if m.name == leader] or [member])[0] leader = Leader(response.metadata.resource_version, None, member) # failover key failover = nodes.get(self.failover_path) metadata = failover and failover.metadata failover = Failover.from_node( metadata and metadata.resource_version, metadata and metadata.annotations) # get synchronization state sync = nodes.get(self.sync_path) metadata = sync and sync.metadata sync = SyncState.from_node(metadata and metadata.resource_version, metadata and metadata.annotations) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history) except Exception: logger.exception('get_cluster') raise KubernetesError('Kubernetes API is not responding properly')
def _load_cluster(self): try: # get list of members response = self.retry(self._api.list_namespaced_pod, self._namespace, label_selector=self._label_selector) members = [self.member(pod) for pod in response.items] response = self.retry(self._api.list_namespaced_kind, self._namespace, label_selector=self._label_selector) nodes = {item.metadata.name: item for item in response.items} config = nodes.get(self.config_path) metadata = config and config.metadata annotations = metadata and metadata.annotations or {} # get initialize flag initialize = annotations.get(self._INITIALIZE) # get global dynamic configuration config = ClusterConfig.from_node(metadata and metadata.resource_version, annotations.get(self._CONFIG) or '{}') # get timeline history history = TimelineHistory.from_node(metadata and metadata.resource_version, annotations.get(self._HISTORY) or '[]') leader = nodes.get(self.leader_path) metadata = leader and leader.metadata self._leader_resource_version = metadata.resource_version if metadata else None self._leader_observed_subsets = leader.subsets if self.__subsets and leader else [] annotations = metadata and metadata.annotations or {} # get last leader operation last_leader_operation = annotations.get(self._OPTIME) last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation) # get leader leader_record = {n: annotations.get(n) for n in (self._LEADER, 'acquireTime', 'ttl', 'renewTime', 'transitions') if n in annotations} if (leader_record or self._leader_observed_record) and leader_record != self._leader_observed_record: self._leader_observed_record = leader_record self._leader_observed_time = time.time() leader = leader_record.get(self._LEADER) try: ttl = int(leader_record.get('ttl')) or self._ttl except (TypeError, ValueError): ttl = self._ttl if not metadata or not self._leader_observed_time or self._leader_observed_time + ttl < time.time(): leader = None if metadata: member = Member(-1, leader, None, {}) member = ([m for m in members if m.name == leader] or [member])[0] leader = Leader(response.metadata.resource_version, None, member) # failover key failover = nodes.get(self.failover_path) metadata = failover and failover.metadata failover = Failover.from_node(metadata and metadata.resource_version, metadata and metadata.annotations) # get synchronization state sync = nodes.get(self.sync_path) metadata = sync and sync.metadata sync = SyncState.from_node(metadata and metadata.resource_version, metadata and metadata.annotations) self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history) except Exception: logger.exception('get_cluster') raise KubernetesError('Kubernetes API is not responding properly')