def connectToUpstreamAdmin(self): if self.listening_conn: # if running self.upstream_admin_conn = None while True: conn = ClientConnection(self, self.upstream_admin_handler, self.upstream_admin) if not conn.isClosed(): break self.upstream_admin_conn = conn
def _nextPartition(self): # XXX: One connection to another storage may remain open forever. # All other previous connections are automatically closed # after some time of inactivity. # This should be improved in several ways: # - Keeping connections open between 2 clusters (backup case) is # quite a good thing because establishing a connection costs # time/bandwidth and replication is actually never finished. # - When all storages of a non-backup cluster are up-to-date, # there's no reason to keep any connection open. if self.current_partition is not None or not self.replicate_dict: return app = self.app # Choose a partition with no unfinished transaction if possible. # XXX: When leaving backup mode, we should only consider UP_TO_DATE # cells. for offset in self.replicate_dict: if not self.partition_dict[offset].max_ttid: break try: addr, name = self.source_dict[offset] except KeyError: assert app.pt.getCell(offset, app.uuid).isOutOfDate() node = random.choice([ cell.getNode() for cell in app.pt.getCellList(offset, readable=True) if cell.getNodeState() == NodeStates.RUNNING ]) name = None else: node = app.nm.getByAddress(addr) if node is None: assert name, addr node = app.nm.createStorage(address=addr) self.current_partition = offset previous_node = self.current_node self.current_node = node if node.isConnected(connecting=True): if node.isIdentified(): node.getConnection().asClient() self.fetchTransactions() else: assert name or node.getUUID() != app.uuid, "loopback connection" conn = ClientConnection(app, StorageOperationHandler(app), node) try: conn.ask( Packets.RequestIdentification(NodeTypes.STORAGE, None if name else app.uuid, app.server, name or app.name)) except ConnectionClosed: if previous_node is self.current_node: return if previous_node is not None and previous_node.isConnected(): app.closeClient(previous_node.getConnection())
def connect(node, uuid=app.uuid, name=app.name): if node.getUUID() == app.uuid: return if node.isConnected(connecting=True): conn = node.getConnection() conn.asClient() else: conn = ClientConnection(app, StorageOperationHandler(app), node) conn.ask(Packets.RequestIdentification( NodeTypes.STORAGE, uuid, app.server, name)) self.conn_dict[conn] = node.isIdentified()
def _nextPartition(self): # XXX: One connection to another storage may remain open forever. # All other previous connections are automatically closed # after some time of inactivity. # This should be improved in several ways: # - Keeping connections open between 2 clusters (backup case) is # quite a good thing because establishing a connection costs # time/bandwidth and replication is actually never finished. # - When all storages of a non-backup cluster are up-to-date, # there's no reason to keep any connection open. if self.current_partition is not None or not self.replicate_dict: return app = self.app # Choose a partition with no unfinished transaction if possible. # XXX: When leaving backup mode, we should only consider UP_TO_DATE # cells. for offset in self.replicate_dict: if not self.partition_dict[offset].max_ttid: break try: addr, name = self.source_dict[offset] except KeyError: assert app.pt.getCell(offset, app.uuid).isOutOfDate() node = random.choice([cell.getNode() for cell in app.pt.getCellList(offset, readable=True) if cell.getNodeState() == NodeStates.RUNNING]) name = None else: node = app.nm.getByAddress(addr) if node is None: assert name, addr node = app.nm.createStorage(address=addr) self.current_partition = offset previous_node = self.current_node self.current_node = node if node.isConnected(connecting=True): if node.isIdentified(): node.getConnection().asClient() self.fetchTransactions() else: assert name or node.getUUID() != app.uuid, "loopback connection" conn = ClientConnection(app, StorageOperationHandler(app), node) try: conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE, None if name else app.uuid, app.server, name or app.name)) except ConnectionClosed: if previous_node is self.current_node: return if previous_node is not None and previous_node.isConnected(): app.closeClient(previous_node.getConnection())
def __getConnection(self): if not self.connected: self.connection = ClientConnection(self, self.handler, self.server) # Never delay reconnection to master. This speeds up unit tests # and it should not change anything for normal use. try: self.connection.setReconnectionNoDelay() except ConnectionClosed: self.connection = None while not self.connected: if self.connection is None: raise NotReadyException('not connected') self.em.poll(1) return self.connection
def playSecondaryRole(self): """ I play a secondary role, thus only wait for a primary master to fail. """ logging.info('play the secondary role with %r', self.listening_conn) # Wait for an announcement. If this is too long, probably # the primary master is down. # XXX: Same remark as in electPrimary. t = time() + 10 while self.primary_master_node is None: self.em.poll(1) if t < time(): # election timeout raise ElectionFailure("Election timeout") self.master_address_dict.clear() # Restart completely. Non-optimized # but lower level code needs to be stabilized first. for conn in self.em.getConnectionList(): if not conn.isListening(): conn.close() # Reconnect to primary master node. primary_handler = secondary.PrimaryHandler(self) ClientConnection(self, primary_handler, self.primary_master_node) # and another for the future incoming connections self.listening_conn.setHandler( identification.SecondaryIdentificationHandler(self)) while True: self.em.poll(1)
def getLoopbackConnection(self): app = MasterApplication(address=BIND, getSSL=NEOCluster.SSL, getReplicas=0, getPartitions=1) try: handler = EventHandler(app) app.listening_conn = ListeningConnection(app, handler, app.server) yield ClientConnection(app, handler, app.nm.createMaster( address=app.listening_conn.getAddress(), uuid=app.uuid)) finally: app.close()
def getLoopbackConnection(self): app = MasterApplication(getSSL=NEOCluster.SSL, getReplicas=0, getPartitions=1) handler = EventHandler(app) app.listening_conn = ListeningConnection(app, handler, app.server) node = app.nm.createMaster(address=app.listening_conn.getAddress(), uuid=app.uuid) conn = ClientConnection.__new__(ClientConnection) def reset(): conn.__dict__.clear() conn.__init__(app, handler, node) conn.reset = reset reset() return conn
def _makeClientConnection(self): with dummy_connector: conn = ClientConnection(self.app, self.handler, self.node) self.connector = conn.connector return conn
class NeoCTL(BaseApplication): connection = None connected = False @classmethod def _buildOptionParser(cls): # XXX: Use argparse sub-commands. parser = cls.option_parser parser.description = "NEO Control node" parser('a', 'address', default='127.0.0.1:9999', parse=lambda x: util.parseNodeAddress(x, 9999), help="address of an admin node") parser.argument('cmd', nargs=argparse.REMAINDER, help="command to execute; if not supplied," " the list of available commands is displayed") def __init__(self, address, **kw): super(NeoCTL, self).__init__(**kw) self.server = self.nm.createAdmin(address=address) self.handler = CommandEventHandler(self) self.response_queue = [] def __getConnection(self): if not self.connected: self.connection = ClientConnection(self, self.handler, self.server) # Never delay reconnection to master. This speeds up unit tests # and it should not change anything for normal use. try: self.connection.setReconnectionNoDelay() except ConnectionClosed: self.connection = None while not self.connected: if self.connection is None: raise NotReadyException('not connected') self.em.poll(1) return self.connection def __ask(self, packet): # TODO: make thread-safe connection = self.__getConnection() connection.ask(packet) response_queue = self.response_queue assert len(response_queue) == 0 while self.connected: self.em.poll(1) if response_queue: break else: raise NotReadyException, 'Connection closed' response = response_queue.pop() if response[0] == Packets.Error and \ response[1] == ErrorCodes.NOT_READY: raise NotReadyException(response[2]) return response def enableStorageList(self, uuid_list): """ Put all given storage nodes in "running" state. """ packet = Packets.AddPendingNodes(uuid_list) response = self.__ask(packet) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def tweakPartitionTable(self, uuid_list=(), dry_run=False): response = self.__ask(Packets.TweakPartitionTable(dry_run, uuid_list)) if response[0] != Packets.AnswerTweakPartitionTable: raise RuntimeError(response) return response[1:] def setNumReplicas(self, nr): response = self.__ask(Packets.SetNumReplicas(nr)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def setClusterState(self, state): """ Set cluster state. """ packet = Packets.SetClusterState(state) response = self.__ask(packet) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def _setNodeState(self, node, state): """ Kill node, or remove it permanently """ response = self.__ask(Packets.SetNodeState(node, state)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def getClusterState(self): """ Get cluster state. """ packet = Packets.AskClusterState() response = self.__ask(packet) if response[0] != Packets.AnswerClusterState: raise RuntimeError(response) return response[1] def getLastIds(self): response = self.__ask(Packets.AskLastIDs()) if response[0] != Packets.AnswerLastIDs: raise RuntimeError(response) return response[1:] def getLastTransaction(self): response = self.__ask(Packets.AskLastTransaction()) if response[0] != Packets.AnswerLastTransaction: raise RuntimeError(response) return response[1] def getRecovery(self): response = self.__ask(Packets.AskRecovery()) if response[0] != Packets.AnswerRecovery: raise RuntimeError(response) return response[1:] def getNodeList(self, node_type=None): """ Get a list of nodes, filtering with given type. """ packet = Packets.AskNodeList(node_type) response = self.__ask(packet) if response[0] != Packets.AnswerNodeList: raise RuntimeError(response) return response[1] # node_list def getPartitionRowList(self, min_offset=0, max_offset=0, node=None): """ Get a list of partition rows, bounded by min & max and involving given node. """ packet = Packets.AskPartitionList(min_offset, max_offset, node) response = self.__ask(packet) if response[0] != Packets.AnswerPartitionList: raise RuntimeError(response) return response[1:] def startCluster(self): """ Set cluster into "verifying" state. """ return self.setClusterState(ClusterStates.VERIFYING) def killNode(self, node): return self._setNodeState(node, NodeStates.DOWN) def dropNode(self, node): return self._setNodeState(node, NodeStates.UNKNOWN) def getPrimary(self): """ Return the primary master UUID. """ packet = Packets.AskPrimary() response = self.__ask(packet) if response[0] != Packets.AnswerPrimary: raise RuntimeError(response) return response[1] def repair(self, *args): response = self.__ask(Packets.Repair(*args)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def truncate(self, tid): response = self.__ask(Packets.Truncate(tid)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def checkReplicas(self, *args): response = self.__ask(Packets.CheckReplicas(*args)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def flushLog(self): conn = self.__getConnection() conn.send(Packets.FlushLog()) while conn.pending(): self.em.poll(1)
def electPrimary(self): """Elect a primary master node. The difficulty is that a master node must accept connections from others while attempting to connect to other master nodes at the same time. Note that storage nodes and client nodes may connect to self as well as master nodes.""" logging.info('begin the election of a primary master') client_handler = election.ClientElectionHandler(self) self.unconnected_master_node_set.clear() self.negotiating_master_node_set.clear() self.master_address_dict.clear() self.listening_conn.setHandler(election.ServerElectionHandler(self)) getByAddress = self.nm.getByAddress while True: # handle new connected masters for node in self.nm.getMasterList(): node.setUnknown() self.unconnected_master_node_set.add(node.getAddress()) # start the election process self.primary = None self.primary_master_node = None try: while (self.unconnected_master_node_set or self.negotiating_master_node_set): for addr in self.unconnected_master_node_set: self.negotiating_master_node_set.add(addr) ClientConnection( self, client_handler, # XXX: Ugly, but the whole election code will be # replaced soon getByAddress(addr)) self.unconnected_master_node_set.clear() self.em.poll(1) except ElectionFailure, m: # something goes wrong, clean then restart logging.error('election failed: %s', m) # Ask all connected nodes to reelect a single primary master. for conn in self.em.getClientList(): conn.notify(Packets.ReelectPrimary()) conn.abort() # Wait until the connections are closed. self.primary = None self.primary_master_node = None # XXX: Since poll does not wake up anymore every second, # the following time condition should be reviewed. # See also playSecondaryRole. t = time() + 10 while self.em.getClientList() and time() < t: try: self.em.poll(1) except ElectionFailure: pass # Close all connections. for conn in self.em.getClientList() + self.em.getServerList(): conn.close() else: # election succeed, stop the process self.primary = self.primary is None break
def run(self): """ Recover the status about the cluster. Obtain the last OID, the last TID, and the last Partition Table ID from storage nodes, then get back the latest partition table or make a new table from scratch, if this is the first time. A new primary master may also arise during this phase. """ logging.info('begin the recovery of the status') app = self.app pt = app.pt app.changeClusterState(ClusterStates.RECOVERING) pt.clear() self.try_secondary = True # collect the last partition table available poll = app.em.poll while 1: if self.try_secondary: # Keep trying to connect to all other known masters, # to make sure there is a challege between each pair # of masters in the cluster. If we win, all connections # opened here will be closed. self.try_secondary = False node_list = [] for node in app.nm.getMasterList(): if not (node is app._node or node.isConnected(True)): # During recovery, master nodes are not put back in # DOWN state by handlers. This is done # entirely in this method (here and after this poll # loop), to minimize the notification packets. if not node.isDown(): node.setDown() node_list.append(node) ClientConnection(app, app.election_handler, node) if node_list: app.broadcastNodesInformation(node_list) poll(1) if pt.filled(): # A partition table exists, we are starting an existing # cluster. node_list = pt.getOperationalNodeSet() if app._startup_allowed: node_list = [ node for node in node_list if node.isPending() ] elif node_list: # we want all nodes to be there if we're going to truncate if app.truncate_tid: node_list = pt.getNodeSet() if not all(node.isPending() for node in node_list): continue elif app._startup_allowed or app.autostart: # No partition table and admin allowed startup, we are # creating a new cluster out of all pending nodes. node_list = app.nm.getStorageList(only_identified=True) if not app._startup_allowed and len(node_list) < app.autostart: continue else: continue if node_list and not any(node.getConnection().isPending() for node in node_list): if pt.filled(): if app.truncate_tid: node_list = app.nm.getIdentifiedList( pool_set={ uuid for uuid, tid in self.truncate_dict.iteritems() if not tid or app.truncate_tid < tid }) if node_list: truncate = Packets.Truncate(app.truncate_tid) for node in node_list: conn = node.getConnection() conn.send(truncate) self.connectionCompleted(conn, False) continue node_list = pt.getConnectedNodeList() break logging.info('startup allowed') for node in node_list: assert node.isPending(), node node.setRunning() for node in app.nm.getMasterList(): if not (node is app._node or node.isIdentified()): if node.isConnected(True): node.getConnection().close() assert node.isDown(), node elif not node.isDown(): assert self.try_secondary, node node.setDown() node_list.append(node) app.broadcastNodesInformation(node_list) if pt.getID() is None: logging.info('creating a new partition table') pt.make(node_list) self._notifyAdmins( Packets.SendPartitionTable(pt.getID(), pt.getRowList())) else: cell_list = pt.outdate() if cell_list: self._notifyAdmins( Packets.NotifyPartitionChanges(pt.setNextID(), cell_list)) if app.backup_tid: pt.setBackupTidDict(self.backup_tid_dict) app.backup_tid = pt.getBackupTid() logging.debug('cluster starts this partition table:') pt.log()
class NeoCTL(BaseApplication): connection = None connected = False def __init__(self, address, **kw): super(NeoCTL, self).__init__(**kw) self.server = self.nm.createAdmin(address=address) self.handler = CommandEventHandler(self) self.response_queue = [] def __getConnection(self): if not self.connected: self.connection = ClientConnection(self, self.handler, self.server) # Never delay reconnection to master. This speeds up unit tests # and it should not change anything for normal use. try: self.connection.setReconnectionNoDelay() except ConnectionClosed: self.connection = None while not self.connected: if self.connection is None: raise NotReadyException('not connected') self.em.poll(1) return self.connection def __ask(self, packet): # TODO: make thread-safe connection = self.__getConnection() connection.ask(packet) response_queue = self.response_queue assert len(response_queue) == 0 while self.connected: self.em.poll(1) if response_queue: break else: raise NotReadyException, 'Connection closed' response = response_queue.pop() if response[0] == Packets.Error and \ response[1] == ErrorCodes.NOT_READY: raise NotReadyException(response[2]) return response def enableStorageList(self, uuid_list): """ Put all given storage nodes in "running" state. """ packet = Packets.AddPendingNodes(uuid_list) response = self.__ask(packet) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def tweakPartitionTable(self, uuid_list=()): response = self.__ask(Packets.TweakPartitionTable(uuid_list)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def setClusterState(self, state): """ Set cluster state. """ packet = Packets.SetClusterState(state) response = self.__ask(packet) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def _setNodeState(self, node, state): """ Kill node, or remove it permanently """ response = self.__ask(Packets.SetNodeState(node, state)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def getClusterState(self): """ Get cluster state. """ packet = Packets.AskClusterState() response = self.__ask(packet) if response[0] != Packets.AnswerClusterState: raise RuntimeError(response) return response[1] def getLastIds(self): response = self.__ask(Packets.AskLastIDs()) if response[0] != Packets.AnswerLastIDs: raise RuntimeError(response) return response[1:] def getLastTransaction(self): response = self.__ask(Packets.AskLastTransaction()) if response[0] != Packets.AnswerLastTransaction: raise RuntimeError(response) return response[1] def getRecovery(self): response = self.__ask(Packets.AskRecovery()) if response[0] != Packets.AnswerRecovery: raise RuntimeError(response) return response[1:] def getNodeList(self, node_type=None): """ Get a list of nodes, filtering with given type. """ packet = Packets.AskNodeList(node_type) response = self.__ask(packet) if response[0] != Packets.AnswerNodeList: raise RuntimeError(response) return response[1] # node_list def getPartitionRowList(self, min_offset=0, max_offset=0, node=None): """ Get a list of partition rows, bounded by min & max and involving given node. """ packet = Packets.AskPartitionList(min_offset, max_offset, node) response = self.__ask(packet) if response[0] != Packets.AnswerPartitionList: raise RuntimeError(response) return response[1:3] # ptid, row_list def startCluster(self): """ Set cluster into "verifying" state. """ return self.setClusterState(ClusterStates.VERIFYING) def killNode(self, node): return self._setNodeState(node, NodeStates.UNKNOWN) def dropNode(self, node): return self._setNodeState(node, NodeStates.DOWN) def getPrimary(self): """ Return the primary master UUID. """ packet = Packets.AskPrimary() response = self.__ask(packet) if response[0] != Packets.AnswerPrimary: raise RuntimeError(response) return response[1] def truncate(self, tid): response = self.__ask(Packets.Truncate(tid)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2] def checkReplicas(self, *args): response = self.__ask(Packets.CheckReplicas(*args)) if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: raise RuntimeError(response) return response[2]
def playSecondaryRole(self): """ A master play the secondary role when it is unlikely to win the election (it lost against against another master during identification or it was notified that another is the primary master). Its only task is to try again to become the primary master when the later fail. When connected to the cluster, the only communication is with the primary master, to stay informed about removed/added master nodes, and exit if requested. """ logging.info('play the secondary role with %r', self.listening_conn) self.primary = None handler = master.PrimaryHandler(self) # The connection to the probably-primary master can be in any state # depending on how we were informed. The only case in which it can not # be reused in when we have pending requests. if self.primary_master.isConnected(True): master_conn = self.primary_master.getConnection() # When we find the primary during identification, we don't attach # the connection (a server one) to any node, and it will be closed # in the below 'for' loop. assert master_conn.isClient(), master_conn try: # We want the handler to be effective immediately. # If it's not possible, let's just reconnect. if not master_conn.setHandler(handler): master_conn.close() assert False except PrimaryFailure: master_conn = None else: master_conn = None for conn in self.em.getConnectionList(): if conn.isListening(): conn.setHandler( identification.SecondaryIdentificationHandler(self)) elif conn is not master_conn: conn.close() failed = {self.server} poll = self.em.poll while True: try: if master_conn is None: for node in self.nm.getMasterList(): node.setDown() node = self.primary_master failed.add(node.getAddress()) if not node.isConnected(True): # On immediate connection failure, # PrimaryFailure is raised. ClientConnection(self, handler, node) else: master_conn = None while True: poll(1) except PrimaryFailure: if self.primary_master.isRunning(): # XXX: What's the best to do here ? Another option is to # choose the RUNNING master node with the lowest # election key (i.e. (id_timestamp, address) as # defined in IdentificationHandler), and return if we # have the lowest one. failed = {self.server} else: # Since the last primary failure (or since we play the # secondary role), do not try any node more than once. for self.primary_master in self.nm.getMasterList(): if self.primary_master.getAddress() not in failed: break else: # All known master nodes are either down or secondary. # Let's play the primary role again. break except PrimaryElected, e: node = self.primary_master self.primary_master, = e.args assert node is not self.primary_master, node try: node.getConnection().close() except PrimaryFailure: pass