Example #1
0
 def test_manual_failover_from_leader(self):
     self.ha.has_lock = true
     self.ha.cluster = get_cluster_initialized_with_leader(
         Failover(0, 'blabla', ''))
     self.assertEquals(self.ha.run_cycle(),
                       'no action.  i am the leader with the lock')
     self.ha.cluster = get_cluster_initialized_with_leader(
         Failover(0, '', MockPostgresql.name))
     self.assertEquals(self.ha.run_cycle(),
                       'no action.  i am the leader with the lock')
     self.ha.cluster = get_cluster_initialized_with_leader(
         Failover(0, '', 'blabla'))
     self.assertEquals(self.ha.run_cycle(),
                       'no action.  i am the leader with the lock')
     f = Failover(0, MockPostgresql.name, '')
     self.ha.cluster = get_cluster_initialized_with_leader(f)
     self.assertEquals(self.ha.run_cycle(),
                       'manual failover: demoting myself')
     self.ha.fetch_node_status = lambda e: (e, True, True, 0, {
         'nofailover': 'True'
     })
     self.assertEquals(self.ha.run_cycle(),
                       'no action.  i am the leader with the lock')
     # manual failover from the previous leader to us won't happen if we hold the nofailover flag
     self.ha.cluster = get_cluster_initialized_with_leader(
         Failover(0, 'blabla', MockPostgresql.name))
     self.assertEquals(self.ha.run_cycle(),
                       'no action.  i am the leader with the lock')
Example #2
0
    def test_manual_failover_from_leader(self):
        self.ha.fetch_node_status = get_node_status()
        self.ha.has_lock = true
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', '', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, '', 'blabla', None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        f = Failover(0, self.p.name, '', None)
        self.ha.cluster = get_cluster_initialized_with_leader(f)
        self.assertEquals(self.ha.run_cycle(),
                          'manual failover: demoting myself')
        self.ha.fetch_node_status = get_node_status(nofailover=True)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        self.ha.fetch_node_status = get_node_status(xlog_location=1)
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')
        # manual failover from the previous leader to us won't happen if we hold the nofailover flag
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, None))
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

        # Failover scheduled time must include timezone
        scheduled = datetime.datetime.now()
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.ha.run_cycle()

        scheduled = datetime.datetime.utcnow().replace(tzinfo=tzutc)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=30)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = scheduled + datetime.timedelta(seconds=-600)
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())

        scheduled = None
        self.ha.cluster = get_cluster_initialized_with_leader(
            Failover(0, 'blabla', self.p.name, scheduled))
        self.assertEquals('no action.  i am the leader with the lock',
                          self.ha.run_cycle())
Example #3
0
 def test_manual_failover_from_leader_in_pause(self):
     self.ha.has_lock = true
     self.ha.is_paused = true
     scheduled = datetime.datetime.now()
     self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, 'blabla', self.p.name, scheduled))
     self.assertEquals('PAUSE: no action.  i am the leader with the lock', self.ha.run_cycle())
     self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, '', None))
     self.assertEquals('PAUSE: no action.  i am the leader with the lock', self.ha.run_cycle())
Example #4
0
 def test_manual_failover_from_leader_in_synchronous_mode(self):
     self.p.is_leader = true
     self.ha.has_lock = true
     self.ha.is_synchronous_mode = true
     self.ha.is_failover_possible = false
     self.ha.process_sync_replication = Mock()
     self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, None))
     self.assertEquals('no action.  i am the leader with the lock', self.ha.run_cycle())
     self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, self.p.name, 'a', None), (self.p.name, 'a'))
     self.ha.is_failover_possible = true
     self.assertEquals('manual failover: demoting myself', self.ha.run_cycle())
Example #5
0
 def test_manual_failover_process_no_leader_in_pause(self):
     self.ha.is_paused = true
     self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, '', 'other', None))
     self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
     self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', '', None))
     self.assertEquals(self.ha.run_cycle(), 'PAUSE: continue to run as master without lock')
     self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', 'blabla', None))
     self.assertEquals('PAUSE: acquired session lock as a leader', self.ha.run_cycle())
     self.p.is_leader = false
     self.p.set_role('replica')
     self.ha.cluster = get_cluster_initialized_without_leader(failover=Failover(0, 'leader', self.p.name, None))
     self.assertEquals(self.ha.run_cycle(), 'PAUSE: promoted self to leader by acquiring session lock')
Example #6
0
    def _load_cluster(self):
        try:
            result = self.retry(self._client.read, self.client_path(''), recursive=True)
            nodes = {os.path.relpath(node.key, result.key): node for node in result.leaves}

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize.value

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation.value)

            # get list of members
            members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1]

            # get leader
            leader = nodes.get(self._LEADER)
            if leader:
                member = Member(-1, leader.value, None, {})
                member = ([m for m in members if m.name == leader.value] or [member])[0]
                leader = Leader(leader.modifiedIndex, leader.ttl, member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover.modifiedIndex, failover.value)

            self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover)
        except etcd.EtcdKeyNotFound:
            self._cluster = Cluster(False, None, None, [], None)
        except:
            logger.exception('get_cluster')
            raise EtcdError('Etcd is not responding properly')
    def _load_cluster(self):
        try:
            path = self.client_path('/')
            _, results = self._client.kv.get(path, recurse=True)

            if results is None:
                raise NotFound

            nodes = {}
            for node in results:
                node['Value'] = (node['Value'] or b'').decode('utf-8')
                nodes[os.path.relpath(node['Key'], path)] = node

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize['Value']

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(
                last_leader_operation['Value'])

            # get list of members
            members = [
                self.member(n) for k, n in nodes.items()
                if k.startswith(self._MEMBERS) and k.count('/') == 1
            ]

            # get leader
            leader = nodes.get(self._LEADER)
            if leader and leader[
                    'Value'] == self._name and self._session != leader.get(
                        'Session', 'x'):
                logger.info(
                    'I am leader but not owner of the session. Removing leader node'
                )
                self._client.kv.delete(self.leader_path,
                                       cas=leader['ModifyIndex'])
                leader = None

            if leader:
                member = Member(-1, leader['Value'], None, {})
                member = ([m for m in members if m.name == leader['Value']]
                          or [member])[0]
                leader = Leader(leader['ModifyIndex'], leader.get('Session'),
                                member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover['ModifyIndex'],
                                              failover['Value'])

            self._cluster = Cluster(initialize, leader, last_leader_operation,
                                    members, failover)
        except NotFound:
            self._cluster = Cluster(False, None, None, [], None)
        except:
            logger.exception('get_cluster')
            raise ConsulError('Consul is not responding properly')
Example #8
0
    def _load_cluster(self):
        try:
            path = self.client_path('/')
            _, results = self.retry(self._client.kv.get, path, recurse=True)

            if results is None:
                raise NotFound

            nodes = {}
            for node in results:
                node['Value'] = (node['Value'] or b'').decode('utf-8')
                nodes[os.path.relpath(node['Key'], path)] = node

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize['Value']

            # get global dynamic configuration
            config = nodes.get(self._CONFIG)
            config = config and ClusterConfig.from_node(config['ModifyIndex'], config['Value'])

            # get timeline history
            history = nodes.get(self._HISTORY)
            history = history and TimelineHistory.from_node(history['ModifyIndex'], history['Value'])

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation['Value'])

            # get list of members
            members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1]

            # get leader
            leader = nodes.get(self._LEADER)
            if not self._ctl and leader and leader['Value'] == self._name \
                    and self._session != leader.get('Session', 'x'):
                logger.info('I am leader but not owner of the session. Removing leader node')
                self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex'])
                leader = None

            if leader:
                member = Member(-1, leader['Value'], None, {})
                member = ([m for m in members if m.name == leader['Value']] or [member])[0]
                leader = Leader(leader['ModifyIndex'], leader.get('Session'), member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover['ModifyIndex'], failover['Value'])

            # get synchronization state
            sync = nodes.get(self._SYNC)
            sync = SyncState.from_node(sync and sync['ModifyIndex'], sync and sync['Value'])

            self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history)
        except NotFound:
            self._cluster = Cluster(None, None, None, None, [], None, None, None)
        except Exception:
            logger.exception('get_cluster')
            raise ConsulError('Consul is not responding properly')
Example #9
0
    def _load_cluster(self):
        try:
            path = self.client_path('/')
            _, results = self.retry(self._client.kv.get, path, recurse=True)

            if results is None:
                raise NotFound

            nodes = {}
            for node in results:
                node['Value'] = (node['Value'] or b'').decode('utf-8')
                nodes[node['Key'][len(path):].lstrip('/')] = node

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize['Value']

            # get global dynamic configuration
            config = nodes.get(self._CONFIG)
            config = config and ClusterConfig.from_node(config['ModifyIndex'], config['Value'])

            # get timeline history
            history = nodes.get(self._HISTORY)
            history = history and TimelineHistory.from_node(history['ModifyIndex'], history['Value'])

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation['Value'])

            # get list of members
            members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1]

            # get leader
            leader = nodes.get(self._LEADER)
            if not self._ctl and leader and leader['Value'] == self._name \
                    and self._session != leader.get('Session', 'x'):
                logger.info('I am leader but not owner of the session. Removing leader node')
                self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex'])
                leader = None

            if leader:
                member = Member(-1, leader['Value'], None, {})
                member = ([m for m in members if m.name == leader['Value']] or [member])[0]
                leader = Leader(leader['ModifyIndex'], leader.get('Session'), member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover['ModifyIndex'], failover['Value'])

            # get synchronization state
            sync = nodes.get(self._SYNC)
            sync = SyncState.from_node(sync and sync['ModifyIndex'], sync and sync['Value'])

            self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history)
        except NotFound:
            self._cluster = Cluster(None, None, None, None, [], None, None, None)
        except Exception:
            logger.exception('get_cluster')
            raise ConsulError('Consul is not responding properly')
Example #10
0
 def test_manual_failover_while_starting(self):
     self.ha.has_lock = true
     self.p.check_for_startup = true
     f = Failover(0, self.p.name, '', None)
     self.ha.cluster = get_cluster_initialized_with_leader(f)
     self.ha.fetch_node_status = get_node_status()  # accessible, in_recovery
     self.assertEquals(self.ha.run_cycle(), 'manual failover: demoting myself')
Example #11
0
    def _load_cluster(self):
        try:
            result = self.retry(self.client.read, self.client_path(''), recursive=True)
            nodes = {os.path.relpath(node.key, result.key): node for node in result.leaves}

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE, None)
            initialize = initialize and initialize.value

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME, None)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation.value)

            # get list of members
            members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1]

            # get leader
            leader = nodes.get(self._LEADER, None)
            if leader:
                member = Member(-1, leader.value, None, {})
                member = ([m for m in members if m.name == leader.value] or [member])[0]
                leader = Leader(leader.modifiedIndex, leader.ttl, member)

            # failover key
            failover = nodes.get(self._FAILOVER, None)
            if failover:
                failover = Failover.from_node(failover.modifiedIndex, failover.value)

            self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover)
        except etcd.EtcdKeyNotFound:
            self._cluster = Cluster(False, None, None, [], None)
        except:
            logger.exception('get_cluster')
            raise EtcdError('Etcd is not responding properly')
Example #12
0
 def test_process_healthy_cluster_in_pause(self):
     self.p.is_leader = false
     self.ha.is_paused = true
     self.p.name = 'leader'
     self.ha.cluster = get_cluster_initialized_with_leader()
     self.assertEquals(self.ha.run_cycle(), 'PAUSE: removed leader lock because postgres is not running as master')
     self.ha.cluster = get_cluster_initialized_with_leader(Failover(0, '', self.p.name, None))
     self.assertEquals(self.ha.run_cycle(), 'PAUSE: waiting to become master after promote...')
Example #13
0
    def _load_cluster(self):
        try:
            result = self.retry(self._client.read,
                                self.client_path(''),
                                recursive=True)
            nodes = {
                os.path.relpath(node.key, result.key): node
                for node in result.leaves
            }

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize.value

            # get global dynamic configuration
            config = nodes.get(self._CONFIG)
            config = config and ClusterConfig.from_node(
                config.modifiedIndex, config.value)

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(
                last_leader_operation.value)

            # get list of members
            members = [
                self.member(n) for k, n in nodes.items()
                if k.startswith(self._MEMBERS) and k.count('/') == 1
            ]

            # get leader
            leader = nodes.get(self._LEADER)
            if leader:
                member = Member(-1, leader.value, None, {})
                member = ([m for m in members if m.name == leader.value]
                          or [member])[0]
                index = result.etcd_index if result.etcd_index > leader.modifiedIndex else leader.modifiedIndex + 1
                leader = Leader(index, leader.ttl, member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover.modifiedIndex,
                                              failover.value)

            # get synchronization state
            sync = nodes.get(self._SYNC)
            sync = SyncState.from_node(sync and sync.modifiedIndex, sync
                                       and sync.value)

            self._cluster = Cluster(initialize, config, leader,
                                    last_leader_operation, members, failover,
                                    sync)
        except etcd.EtcdKeyNotFound:
            self._cluster = Cluster(None, None, None, None, [], None, None)
        except:
            logger.exception('get_cluster')
            raise EtcdError('Etcd is not responding properly')
Example #14
0
    def _inner_load_cluster(self):
        self._fetch_cluster = False
        self.event.clear()
        nodes = set(
            self.get_children(self.client_path(''), self.cluster_watcher))
        if not nodes:
            self._fetch_cluster = True

        # get initialize flag
        initialize = (self.get_node(self.initialize_path)
                      or [None])[0] if self._INITIALIZE in nodes else None

        # get global dynamic configuration
        config = self.get_node(
            self.config_path,
            watch=self.cluster_watcher) if self._CONFIG in nodes else None
        config = config and ClusterConfig.from_node(config[1].version,
                                                    config[0], config[1].mzxid)

        # get last leader operation
        last_leader_operation = self._OPTIME in nodes and self._fetch_cluster and self.get_node(
            self.leader_optime_path)
        last_leader_operation = last_leader_operation and int(
            last_leader_operation[0]) or 0

        # get list of members
        members = self.load_members() if self._MEMBERS[:-1] in nodes else []

        # get leader
        leader = self.get_node(
            self.leader_path) if self._LEADER in nodes else None
        if leader:
            client_id = self._client.client_id
            if not self._ctl and leader[0] == self._name and client_id is not None \
                    and client_id[0] != leader[1].ephemeralOwner:
                logger.info(
                    'I am leader but not owner of the session. Removing leader node'
                )
                self._client.delete(self.leader_path)
                leader = None

            if leader:
                member = Member(-1, leader[0], None, {})
                member = ([m for m in members if m.name == leader[0]]
                          or [member])[0]
                leader = Leader(leader[1].version, leader[1].ephemeralOwner,
                                member)
                self._fetch_cluster = member.index == -1

        # failover key
        failover = self.get_node(
            self.failover_path,
            watch=self.cluster_watcher) if self._FAILOVER in nodes else None
        failover = failover and Failover.from_node(failover[1].version,
                                                   failover[0])

        self._cluster = Cluster(initialize, config, leader,
                                last_leader_operation, members, failover)
Example #15
0
 def test_manual_failover_process_no_leader(self):
     self.p.is_leader = false
     self.ha.cluster = get_cluster_initialized_without_leader(
         failover=Failover(0, '', self.p.name, None))
     self.assertEquals(self.ha.run_cycle(),
                       'promoted self to leader by acquiring session lock')
     self.ha.cluster = get_cluster_initialized_without_leader(
         failover=Failover(0, '', 'leader', None))
     self.p.set_role('replica')
     self.assertEquals(self.ha.run_cycle(),
                       'promoted self to leader by acquiring session lock')
     self.ha.fetch_node_status = lambda e: (e, True, True, 0, {}
                                            )  # accessible, in_recovery
     self.assertEquals(
         self.ha.run_cycle(),
         'following a different leader because i am not the healthiest node'
     )
     self.ha.cluster = get_cluster_initialized_without_leader(
         failover=Failover(0, self.p.name, '', None))
     self.assertEquals(
         self.ha.run_cycle(),
         'following a different leader because i am not the healthiest node'
     )
     self.ha.fetch_node_status = lambda e: (e, False, True, 0, {}
                                            )  # inaccessible, in_recovery
     self.p.set_role('replica')
     self.assertEquals(self.ha.run_cycle(),
                       'promoted self to leader by acquiring session lock')
     # set failover flag to True for all members of the cluster
     # this should elect the current member, as we are not going to call the API for it.
     self.ha.cluster = get_cluster_initialized_without_leader(
         failover=Failover(0, '', 'other', None))
     self.ha.fetch_node_status = lambda e: (e, True, True, 0, {
         'nofailover': 'True'
     })  # accessible, in_recovery
     self.p.set_role('replica')
     self.assertEquals(self.ha.run_cycle(),
                       'promoted self to leader by acquiring session lock')
     # same as previous, but set the current member to nofailover. In no case it should be elected as a leader
     self.ha.patroni.nofailover = True
     self.assertEquals(
         self.ha.run_cycle(),
         'following a different leader because I am not allowed to promote')
Example #16
0
    def _load_cluster(self):
        prefix = self.client_path('')
        response = self._sync_obj.get(prefix, recursive=True)
        if not response:
            return Cluster(None, None, None, None, [], None, None, None)
        nodes = {
            os.path.relpath(key, prefix).replace('\\', '/'): value
            for key, value in response.items()
        }

        # get initialize flag
        initialize = nodes.get(self._INITIALIZE)
        initialize = initialize and initialize['value']

        # get global dynamic configuration
        config = nodes.get(self._CONFIG)
        config = config and ClusterConfig.from_node(config['index'],
                                                    config['value'])

        # get timeline history
        history = nodes.get(self._HISTORY)
        history = history and TimelineHistory.from_node(
            history['index'], history['value'])

        # get last leader operation
        last_leader_operation = nodes.get(self._LEADER_OPTIME)
        last_leader_operation = 0 if last_leader_operation is None else int(
            last_leader_operation['value'])

        # get list of members
        members = [
            self.member(k, n) for k, n in nodes.items()
            if k.startswith(self._MEMBERS) and k.count('/') == 1
        ]

        # get leader
        leader = nodes.get(self._LEADER)
        if leader:
            member = Member(-1, leader['value'], None, {})
            member = ([m for m in members if m.name == leader['value']]
                      or [member])[0]
            leader = Leader(leader['index'], None, member)

        # failover key
        failover = nodes.get(self._FAILOVER)
        if failover:
            failover = Failover.from_node(failover['index'], failover['value'])

        # get synchronization state
        sync = nodes.get(self._SYNC)
        sync = SyncState.from_node(sync and sync['index'], sync
                                   and sync['value'])

        return Cluster(initialize, config, leader, last_leader_operation,
                       members, failover, sync, history)
Example #17
0
    def _inner_load_cluster(self):
        self._fetch_cluster = False
        self.event.clear()
        nodes = set(self.get_children(self.client_path(''), self.cluster_watcher))
        if not nodes:
            self._fetch_cluster = True

        # get initialize flag
        initialize = (self.get_node(self.initialize_path) or [None])[0] if self._INITIALIZE in nodes else None

        # get global dynamic configuration
        config = self.get_node(self.config_path, watch=self.cluster_watcher) if self._CONFIG in nodes else None
        config = config and ClusterConfig.from_node(config[1].version, config[0], config[1].mzxid)

        # get timeline history
        history = self.get_node(self.history_path, watch=self.cluster_watcher) if self._HISTORY in nodes else None
        history = history and TimelineHistory.from_node(history[1].mzxid, history[0])

        # get synchronization state
        sync = self.get_node(self.sync_path, watch=self.cluster_watcher) if self._SYNC in nodes else None
        sync = SyncState.from_node(sync and sync[1].version, sync and sync[0])

        # get list of members
        sync_standby = sync.leader == self._name and sync.members or []
        members = self.load_members(sync_standby) if self._MEMBERS[:-1] in nodes else []

        # get leader
        leader = self.get_node(self.leader_path) if self._LEADER in nodes else None
        if leader:
            client_id = self._client.client_id
            if not self._ctl and leader[0] == self._name and client_id is not None \
                    and client_id[0] != leader[1].ephemeralOwner:
                logger.info('I am leader but not owner of the session. Removing leader node')
                self._client.delete(self.leader_path)
                leader = None

            if leader:
                member = Member(-1, leader[0], None, {})
                member = ([m for m in members if m.name == leader[0]] or [member])[0]
                leader = Leader(leader[1].version, leader[1].ephemeralOwner, member)
                self._fetch_cluster = member.index == -1

        # get last leader operation
        last_leader_operation = self._OPTIME in nodes and self.get_leader_optime(leader)

        # failover key
        failover = self.get_node(self.failover_path, watch=self.cluster_watcher) if self._FAILOVER in nodes else None
        failover = failover and Failover.from_node(failover[1].version, failover[0])

        return Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history)
    def _inner_load_cluster(self):
        self._fetch_cluster = False
        self.event.clear()
        nodes = set(
            self.get_children(self.client_path(''), self.cluster_watcher))
        if not nodes:
            self._fetch_cluster = True

        # get initialize flag
        initialize = (self.get_node(self.initialize_path)
                      or [None])[0] if self._INITIALIZE in nodes else None

        # get list of members
        members = self.load_members() if self._MEMBERS[:-1] in nodes else []

        # get leader
        leader = self.get_node(
            self.leader_path) if self._LEADER in nodes else None
        if leader:
            client_id = self._client.client_id
            if leader[0] == self._name and client_id is not None and client_id[
                    0] != leader[1].ephemeralOwner:
                logger.info(
                    'I am leader but not owner of the session. Removing leader node'
                )
                self._client.delete(self.leader_path)
                leader = None

            if leader:
                member = Member(-1, leader[0], None, {})
                member = ([m for m in members if m.name == leader[0]]
                          or [member])[0]
                leader = Leader(leader[1].version, leader[1].ephemeralOwner,
                                member)
                self._fetch_cluster = member.index == -1

        # failover key
        failover = self.get_node(
            self.failover_path,
            watch=self.cluster_watcher) if self._FAILOVER in nodes else None
        if failover:
            failover = Failover.from_node(failover[1].version, failover[0])

        # get last leader operation
        optime = self.get_node(
            self.leader_optime_path
        ) if self._OPTIME in nodes and self._fetch_cluster else None
        self._last_leader_operation = 0 if optime is None else int(optime[0])
        self._cluster = Cluster(initialize, leader,
                                self._last_leader_operation, members, failover)
    def _load_cluster(self):
        try:
            path = self.client_path('/')
            _, results = self._client.kv.get(path, recurse=True)

            if results is None:
                raise NotFound

            nodes = {}
            for node in results:
                node['Value'] = (node['Value'] or b'').decode('utf-8')
                nodes[os.path.relpath(node['Key'], path)] = node

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize['Value']

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation['Value'])

            # get list of members
            members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1]

            # get leader
            leader = nodes.get(self._LEADER)
            if leader and leader['Value'] == self._name and self._session != leader.get('Session', 'x'):
                logger.info('I am leader but not owner of the session. Removing leader node')
                self._client.kv.delete(self.leader_path, cas=leader['ModifyIndex'])
                leader = None

            if leader:
                member = Member(-1, leader['Value'], None, {})
                member = ([m for m in members if m.name == leader['Value']] or [member])[0]
                leader = Leader(leader['ModifyIndex'], leader.get('Session'), member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover['ModifyIndex'], failover['Value'])

            self._cluster = Cluster(initialize, leader, last_leader_operation, members, failover)
        except NotFound:
            self._cluster = Cluster(False, None, None, [], None)
        except:
            logger.exception('get_cluster')
            raise ConsulError('Consul is not responding properly')
Example #20
0
    def _load_cluster(self):
        try:
            result = self.retry(self._client.read, self.client_path(''), recursive=True)
            nodes = {os.path.relpath(node.key, result.key): node for node in result.leaves}

            # get initialize flag
            initialize = nodes.get(self._INITIALIZE)
            initialize = initialize and initialize.value

            # get global dynamic configuration
            config = nodes.get(self._CONFIG)
            config = config and ClusterConfig.from_node(config.modifiedIndex, config.value)

            # get last leader operation
            last_leader_operation = nodes.get(self._LEADER_OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation.value)

            # get list of members
            members = [self.member(n) for k, n in nodes.items() if k.startswith(self._MEMBERS) and k.count('/') == 1]

            # get leader
            leader = nodes.get(self._LEADER)
            if leader:
                member = Member(-1, leader.value, None, {})
                member = ([m for m in members if m.name == leader.value] or [member])[0]
                index = result.etcd_index if result.etcd_index > leader.modifiedIndex else leader.modifiedIndex + 1
                leader = Leader(index, leader.ttl, member)

            # failover key
            failover = nodes.get(self._FAILOVER)
            if failover:
                failover = Failover.from_node(failover.modifiedIndex, failover.value)

            # get synchronization state
            sync = nodes.get(self._SYNC)
            sync = SyncState.from_node(sync and sync.modifiedIndex, sync and sync.value)

            self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync)
        except etcd.EtcdKeyNotFound:
            self._cluster = Cluster(None, None, None, None, [], None, None)
        except:
            logger.exception('get_cluster')
            raise EtcdError('Etcd is not responding properly')
Example #21
0
    def _inner_load_cluster(self):
        self._fetch_cluster = False
        self.event.clear()
        nodes = set(self.get_children(self.client_path(''), self.cluster_watcher))
        if not nodes:
            self._fetch_cluster = True

        # get initialize flag
        initialize = (self.get_node(self.initialize_path) or [None])[0] if self._INITIALIZE in nodes else None

        # get global dynamic configuration
        config = self.get_node(self.config_path, watch=self.cluster_watcher) if self._CONFIG in nodes else None
        config = config and ClusterConfig.from_node(config[1].version, config[0], config[1].mzxid)

        # get list of members
        members = self.load_members() if self._MEMBERS[:-1] in nodes else []

        # get leader
        leader = self.get_node(self.leader_path) if self._LEADER in nodes else None
        if leader:
            client_id = self._client.client_id
            if leader[0] == self._name and client_id is not None and client_id[0] != leader[1].ephemeralOwner:
                logger.info('I am leader but not owner of the session. Removing leader node')
                self._client.delete(self.leader_path)
                leader = None

            if leader:
                member = Member(-1, leader[0], None, {})
                member = ([m for m in members if m.name == leader[0]] or [member])[0]
                leader = Leader(leader[1].version, leader[1].ephemeralOwner, member)
                self._fetch_cluster = member.index == -1

        # failover key
        failover = self.get_node(self.failover_path, watch=self.cluster_watcher) if self._FAILOVER in nodes else None
        failover = failover and Failover.from_node(failover[1].version, failover[0])

        # get last leader operation
        optime = self.get_node(self.leader_optime_path) if self._OPTIME in nodes and self._fetch_cluster else None
        self._last_leader_operation = 0 if optime is None else int(optime[0])
        self._cluster = Cluster(initialize, config, leader, self._last_leader_operation, members, failover)
Example #22
0
    def _load_cluster(self):
        try:
            # get list of members
            response = self.retry(self._api.list_namespaced_pod,
                                  self._namespace,
                                  label_selector=self._label_selector)
            members = [self.member(pod) for pod in response.items]

            response = self.retry(self._api.list_namespaced_kind,
                                  self._namespace,
                                  label_selector=self._label_selector)
            nodes = {item.metadata.name: item for item in response.items}

            config = nodes.get(self.config_path)
            metadata = config and config.metadata
            annotations = metadata and metadata.annotations or {}

            # get initialize flag
            initialize = annotations.get(self._INITIALIZE)

            # get global dynamic configuration
            config = ClusterConfig.from_node(
                metadata and metadata.resource_version,
                annotations.get(self._CONFIG) or '{}')

            # get timeline history
            history = TimelineHistory.from_node(
                metadata and metadata.resource_version,
                annotations.get(self._HISTORY) or '[]')

            leader = nodes.get(self.leader_path)
            metadata = leader and leader.metadata
            self._leader_resource_version = metadata.resource_version if metadata else None
            self._leader_observed_subsets = leader.subsets if self.__subsets and leader else []
            annotations = metadata and metadata.annotations or {}

            # get last leader operation
            last_leader_operation = annotations.get(self._OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(
                last_leader_operation)

            # get leader
            leader_record = {
                n: annotations.get(n)
                for n in (self._LEADER, 'acquireTime', 'ttl', 'renewTime',
                          'transitions') if n in annotations
            }
            if (leader_record or self._leader_observed_record
                ) and leader_record != self._leader_observed_record:
                self._leader_observed_record = leader_record
                self._leader_observed_time = time.time()

            leader = leader_record.get(self._LEADER)
            try:
                ttl = int(leader_record.get('ttl')) or self._ttl
            except (TypeError, ValueError):
                ttl = self._ttl

            if not metadata or not self._leader_observed_time or self._leader_observed_time + ttl < time.time(
            ):
                leader = None

            if metadata:
                member = Member(-1, leader, None, {})
                member = ([m for m in members if m.name == leader]
                          or [member])[0]
                leader = Leader(response.metadata.resource_version, None,
                                member)

            # failover key
            failover = nodes.get(self.failover_path)
            metadata = failover and failover.metadata
            failover = Failover.from_node(
                metadata and metadata.resource_version, metadata
                and metadata.annotations)

            # get synchronization state
            sync = nodes.get(self.sync_path)
            metadata = sync and sync.metadata
            sync = SyncState.from_node(metadata and metadata.resource_version,
                                       metadata and metadata.annotations)

            self._cluster = Cluster(initialize, config, leader,
                                    last_leader_operation, members, failover,
                                    sync, history)
        except Exception:
            logger.exception('get_cluster')
            raise KubernetesError('Kubernetes API is not responding properly')
Example #23
0
    def _load_cluster(self):
        try:
            # get list of members
            response = self.retry(self._api.list_namespaced_pod, self._namespace, label_selector=self._label_selector)
            members = [self.member(pod) for pod in response.items]

            response = self.retry(self._api.list_namespaced_kind, self._namespace, label_selector=self._label_selector)
            nodes = {item.metadata.name: item for item in response.items}

            config = nodes.get(self.config_path)
            metadata = config and config.metadata
            annotations = metadata and metadata.annotations or {}

            # get initialize flag
            initialize = annotations.get(self._INITIALIZE)

            # get global dynamic configuration
            config = ClusterConfig.from_node(metadata and metadata.resource_version,
                                             annotations.get(self._CONFIG) or '{}')

            # get timeline history
            history = TimelineHistory.from_node(metadata and metadata.resource_version,
                                                annotations.get(self._HISTORY) or '[]')

            leader = nodes.get(self.leader_path)
            metadata = leader and leader.metadata
            self._leader_resource_version = metadata.resource_version if metadata else None
            self._leader_observed_subsets = leader.subsets if self.__subsets and leader else []
            annotations = metadata and metadata.annotations or {}

            # get last leader operation
            last_leader_operation = annotations.get(self._OPTIME)
            last_leader_operation = 0 if last_leader_operation is None else int(last_leader_operation)

            # get leader
            leader_record = {n: annotations.get(n) for n in (self._LEADER, 'acquireTime',
                             'ttl', 'renewTime', 'transitions') if n in annotations}
            if (leader_record or self._leader_observed_record) and leader_record != self._leader_observed_record:
                self._leader_observed_record = leader_record
                self._leader_observed_time = time.time()

            leader = leader_record.get(self._LEADER)
            try:
                ttl = int(leader_record.get('ttl')) or self._ttl
            except (TypeError, ValueError):
                ttl = self._ttl

            if not metadata or not self._leader_observed_time or self._leader_observed_time + ttl < time.time():
                leader = None

            if metadata:
                member = Member(-1, leader, None, {})
                member = ([m for m in members if m.name == leader] or [member])[0]
                leader = Leader(response.metadata.resource_version, None, member)

            # failover key
            failover = nodes.get(self.failover_path)
            metadata = failover and failover.metadata
            failover = Failover.from_node(metadata and metadata.resource_version, metadata and metadata.annotations)

            # get synchronization state
            sync = nodes.get(self.sync_path)
            metadata = sync and sync.metadata
            sync = SyncState.from_node(metadata and metadata.resource_version,  metadata and metadata.annotations)

            self._cluster = Cluster(initialize, config, leader, last_leader_operation, members, failover, sync, history)
        except Exception:
            logger.exception('get_cluster')
            raise KubernetesError('Kubernetes API is not responding properly')