def become_leader(connection): global currentleader global follower global retrythread log.log({ 'info': 'Becoming leader of collective', 'subsystem': 'collective' }) if follower: follower.kill() cfm.stop_following() follower = None if retrythread: retrythread.cancel() retrythread = None currentleader = connection.getsockname()[0] skipaddr = connection.getpeername()[0] myname = get_myname() skipem = set(cfm.cfgstreams) skipem.add(currentleader) skipem.add(skipaddr) for member in cfm.list_collective(): dronecandidate = cfm.get_collective_member(member)['address'] if dronecandidate in skipem or member == myname: continue eventlet.spawn_n(try_assimilate, dronecandidate) schedule_rebalance()
def follow_leader(remote, leader): global currentleader cleanexit = False try: cfm.follow_channel(remote) except greenlet.GreenletExit: cleanexit = True finally: if cleanexit: log.log({ 'info': 'Previous following cleanly closed', 'subsystem': 'collective' }) return log.log({ 'info': 'Current leader ({0}) has disappeared, restarting ' 'collective membership'.format(leader), 'subsystem': 'collective' }) # The leader has folded, time to startup again... cfm.stop_following() currentleader = None eventlet.spawn_n(start_collective)
def follow_leader(remote): global currentleader cfm.follow_channel(remote) # The leader has folded, time to startup again... cfm.stop_following() currentleader = None eventlet.spawn_n(start_collective)
def start_collective(): global follower global retrythread if follower: follower.kill() cfm.stop_following() follower = None try: if cfm.cfgstreams: cfm.check_quorum() # Do not start if we have quorum and are leader return except exc.DegradedCollective: pass if leader_init.active: # do not start trying to connect if we are # xmitting data to a follower return myname = get_myname() for member in sorted(list(cfm.list_collective())): if member == myname: continue if cfm.cfgleader is None: cfm.stop_following(True) ldrcandidate = cfm.get_collective_member(member)['address'] log.log({'info': 'Performing startup attempt to {0}'.format( ldrcandidate), 'subsystem': 'collective'}) if connect_to_leader(name=myname, leader=ldrcandidate): break else: retrythread = eventlet.spawn_after(30 + random.random(), start_collective)
def start_collective(): global follower global retrythread global initting initting = True retrythread = None try: cfm.membership_callback = schedule_rebalance if follower is not None: initting = False return try: if cfm.cfgstreams: cfm.check_quorum() # Do not start if we have quorum and are leader return except exc.DegradedCollective: pass if leader_init.active: # do not start trying to connect if we are # xmitting data to a follower return myname = get_myname() connecto = [] for member in sorted(list(cfm.list_collective())): if member == myname: continue if cfm.cfgleader is None: cfm.stop_following(True) ldrcandidate = cfm.get_collective_member(member)['address'] connecto.append(ldrcandidate) conpool = greenpool.GreenPool(64) connections = conpool.imap(create_connection, connecto) for ent in connections: member, remote = ent if isinstance(remote, Exception): continue if follower is None: log.log({ 'info': 'Performing startup attempt to {0}'.format(member), 'subsystem': 'collective' }) if not connect_to_leader( name=myname, leader=member, remote=remote): remote.close() else: remote.close() except Exception as e: pass finally: if retrythread is None and follower is None: retrythread = eventlet.spawn_after(5 + random.random(), start_collective) initting = False
def start_collective(): global follower global retrythread if follower: follower.kill() follower = None if leader_init.active: # do not start trying to connect if we are # xmitting data to a follower return myname = get_myname() for member in sorted(list(cfm.list_collective())): if member == myname: continue if cfm.cfgleader is None: cfm.stop_following(True) ldrcandidate = cfm.get_collective_member(member)['address'] if connect_to_leader(name=myname, leader=ldrcandidate): break else: retrythread = eventlet.spawn_after(30 + random.random(), start_collective)
def follow_leader(remote, leader): global currentleader global retrythread global follower cleanexit = False newleader = None try: exitcause = cfm.follow_channel(remote) newleader = exitcause.get('newleader', None) except greenlet.GreenletExit: cleanexit = True finally: if cleanexit: log.log({ 'info': 'Previous following cleanly closed', 'subsystem': 'collective' }) return if newleader: log.log({ 'info': 'Previous leader directed us to join new leader {}'.format( newleader) }) if connect_to_leader(None, get_myname(), newleader): return log.log({ 'info': 'Current leader ({0}) has disappeared, restarting ' 'collective membership'.format(leader), 'subsystem': 'collective' }) # The leader has folded, time to startup again... follower = None cfm.stop_following() currentleader = None if retrythread is None: # start a recovery retrythread = eventlet.spawn_after(random.random(), start_collective)
def become_leader(connection): global currentleader global follower global retrythread global reassimilate log.log({ 'info': 'Becoming leader of collective', 'subsystem': 'collective' }) if follower is not None: follower.kill() cfm.stop_following() follower = None if retrythread is not None: retrythread.cancel() retrythread = None currentleader = connection.getsockname()[0] skipaddr = connection.getpeername()[0] if _assimilate_missing(skipaddr): schedule_rebalance() if reassimilate is not None: reassimilate.kill() reassimilate = eventlet.spawn(reassimilate_missing)
def connect_to_leader(cert=None, name=None, leader=None): global currentleader global follower if leader is None: leader = currentleader log.log({ 'info': 'Attempting connection to leader {0}'.format(leader), 'subsystem': 'collective' }) try: remote = connect_to_collective(cert, leader) except socket.error as e: log.log({ 'error': 'Collective connection attempt to {0} failed: {1}' ''.format(leader, str(e)), 'subsystem': 'collective' }) return False with connecting: with cfm._initlock: banner = tlvdata.recv(remote) # the banner vers = banner.split()[2] if vers != b'v2': raise Exception( 'This instance only supports protocol 2, synchronize versions between collective members' ) tlvdata.recv(remote) # authpassed... 0.. if name is None: name = get_myname() tlvdata.send( remote, { 'collective': { 'operation': 'connect', 'name': name, 'txcount': cfm._txcount } }) keydata = tlvdata.recv(remote) if not keydata: return False if 'error' in keydata: if 'backoff' in keydata: log.log({ 'info': 'Collective initialization in progress on ' '{0}'.format(leader), 'subsystem': 'collective' }) return False if 'leader' in keydata: log.log({ 'info': 'Prospective leader {0} has redirected this ' 'member to {1}'.format(leader, keydata['leader']), 'subsystem': 'collective' }) ldrc = cfm.get_collective_member_by_address( keydata['leader']) if ldrc and ldrc['name'] == name: raise Exception("Redirected to self") return connect_to_leader(name=name, leader=keydata['leader']) if 'txcount' in keydata: log.log({ 'info': 'Prospective leader {0} has inferior ' 'transaction count, becoming leader' ''.format(leader), 'subsystem': 'collective', 'subsystem': 'collective' }) return become_leader(remote) return False follower.kill() cfm.stop_following() follower = None if follower: follower.kill() cfm.stop_following() follower = None log.log({ 'info': 'Following leader {0}'.format(leader), 'subsystem': 'collective' }) colldata = tlvdata.recv(remote) # the protocol transmits global data, but for now we ignore it globaldata = tlvdata.recv(remote) dbi = tlvdata.recv(remote) dbsize = dbi['dbsize'] dbjson = b'' while (len(dbjson) < dbsize): ndata = remote.recv(dbsize - len(dbjson)) if not ndata: try: remote.close() except Exception: pass raise Exception("Error doing initial DB transfer") dbjson += ndata cfm.clear_configuration() try: cfm._restore_keys(keydata, None, sync=False) for c in colldata: cfm._true_add_collective_member(c, colldata[c]['address'], colldata[c]['fingerprint'], sync=False) #for globvar in globaldata: # cfm.set_global(globvar, globaldata[globvar], False) cfm._txcount = dbi.get('txcount', 0) cfm.ConfigManager(tenant=None)._load_from_json(dbjson, sync=False) cfm.commit_clear() except Exception: cfm.stop_following() cfm.rollback_clear() raise currentleader = leader #spawn this as a thread... follower = eventlet.spawn(follow_leader, remote, leader) return True
def connect_to_leader(cert=None, name=None, leader=None): global currentleader global cfginitlock global follower if cfginitlock is None: cfginitlock = threading.RLock() if leader is None: leader = currentleader try: remote = connect_to_collective(cert, leader) except socket.error: return False with connecting: with cfginitlock: tlvdata.recv(remote) # the banner tlvdata.recv(remote) # authpassed... 0.. if name is None: name = get_myname() tlvdata.send( remote, { 'collective': { 'operation': 'connect', 'name': name, 'txcount': cfm._txcount } }) keydata = tlvdata.recv(remote) if not keydata: return False if 'error' in keydata: if 'backoff' in keydata: eventlet.spawn_after(random.random(), connect_to_leader, cert, name, leader) return True if 'leader' in keydata: ldrc = cfm.get_collective_member_by_address( keydata['leader']) if ldrc and ldrc['name'] == name: raise Exception("Redirected to self") return connect_to_leader(name=name, leader=keydata['leader']) if 'txcount' in keydata: return become_leader(remote) print(keydata['error']) return False if follower is not None: follower.kill() cfm.stop_following() follower = None colldata = tlvdata.recv(remote) globaldata = tlvdata.recv(remote) dbi = tlvdata.recv(remote) dbsize = dbi['dbsize'] dbjson = '' while (len(dbjson) < dbsize): ndata = remote.recv(dbsize - len(dbjson)) if not ndata: try: remote.close() except Exception: pass raise Exception("Error doing initial DB transfer") dbjson += ndata cfm.clear_configuration() try: cfm._restore_keys(keydata, None, sync=False) for c in colldata: cfm._true_add_collective_member(c, colldata[c]['address'], colldata[c]['fingerprint'], sync=False) for globvar in globaldata: cfm.set_global(globvar, globaldata[globvar], False) cfm._txcount = dbi.get('txcount', 0) cfm.ConfigManager(tenant=None)._load_from_json(dbjson, sync=False) cfm.commit_clear() except Exception: cfm.stop_following() cfm.rollback_clear() raise currentleader = leader #spawn this as a thread... follower = eventlet.spawn(follow_leader, remote) return True