Beispiel #1
0
 def connectToUpstreamAdmin(self):
     if self.listening_conn:  # if running
         self.upstream_admin_conn = None
         while True:
             conn = ClientConnection(self, self.upstream_admin_handler,
                                     self.upstream_admin)
             if not conn.isClosed():
                 break
         self.upstream_admin_conn = conn
Beispiel #2
0
 def _nextPartition(self):
     # XXX: One connection to another storage may remain open forever.
     #      All other previous connections are automatically closed
     #      after some time of inactivity.
     #      This should be improved in several ways:
     #      - Keeping connections open between 2 clusters (backup case) is
     #        quite a good thing because establishing a connection costs
     #        time/bandwidth and replication is actually never finished.
     #      - When all storages of a non-backup cluster are up-to-date,
     #        there's no reason to keep any connection open.
     if self.current_partition is not None or not self.replicate_dict:
         return
     app = self.app
     # Choose a partition with no unfinished transaction if possible.
     # XXX: When leaving backup mode, we should only consider UP_TO_DATE
     #      cells.
     for offset in self.replicate_dict:
         if not self.partition_dict[offset].max_ttid:
             break
     try:
         addr, name = self.source_dict[offset]
     except KeyError:
         assert app.pt.getCell(offset, app.uuid).isOutOfDate()
         node = random.choice([
             cell.getNode()
             for cell in app.pt.getCellList(offset, readable=True)
             if cell.getNodeState() == NodeStates.RUNNING
         ])
         name = None
     else:
         node = app.nm.getByAddress(addr)
         if node is None:
             assert name, addr
             node = app.nm.createStorage(address=addr)
     self.current_partition = offset
     previous_node = self.current_node
     self.current_node = node
     if node.isConnected(connecting=True):
         if node.isIdentified():
             node.getConnection().asClient()
             self.fetchTransactions()
     else:
         assert name or node.getUUID() != app.uuid, "loopback connection"
         conn = ClientConnection(app, StorageOperationHandler(app), node)
         try:
             conn.ask(
                 Packets.RequestIdentification(NodeTypes.STORAGE,
                                               None if name else app.uuid,
                                               app.server, name
                                               or app.name))
         except ConnectionClosed:
             if previous_node is self.current_node:
                 return
     if previous_node is not None and previous_node.isConnected():
         app.closeClient(previous_node.getConnection())
Beispiel #3
0
 def connect(node, uuid=app.uuid, name=app.name):
     if node.getUUID() == app.uuid:
         return
     if node.isConnected(connecting=True):
         conn = node.getConnection()
         conn.asClient()
     else:
         conn = ClientConnection(app, StorageOperationHandler(app), node)
         conn.ask(Packets.RequestIdentification(
             NodeTypes.STORAGE, uuid, app.server, name))
     self.conn_dict[conn] = node.isIdentified()
Beispiel #4
0
 def _nextPartition(self):
     # XXX: One connection to another storage may remain open forever.
     #      All other previous connections are automatically closed
     #      after some time of inactivity.
     #      This should be improved in several ways:
     #      - Keeping connections open between 2 clusters (backup case) is
     #        quite a good thing because establishing a connection costs
     #        time/bandwidth and replication is actually never finished.
     #      - When all storages of a non-backup cluster are up-to-date,
     #        there's no reason to keep any connection open.
     if self.current_partition is not None or not self.replicate_dict:
         return
     app = self.app
     # Choose a partition with no unfinished transaction if possible.
     # XXX: When leaving backup mode, we should only consider UP_TO_DATE
     #      cells.
     for offset in self.replicate_dict:
         if not self.partition_dict[offset].max_ttid:
             break
     try:
         addr, name = self.source_dict[offset]
     except KeyError:
         assert app.pt.getCell(offset, app.uuid).isOutOfDate()
         node = random.choice([cell.getNode()
             for cell in app.pt.getCellList(offset, readable=True)
             if cell.getNodeState() == NodeStates.RUNNING])
         name = None
     else:
         node = app.nm.getByAddress(addr)
         if node is None:
             assert name, addr
             node = app.nm.createStorage(address=addr)
     self.current_partition = offset
     previous_node = self.current_node
     self.current_node = node
     if node.isConnected(connecting=True):
         if node.isIdentified():
             node.getConnection().asClient()
             self.fetchTransactions()
     else:
         assert name or node.getUUID() != app.uuid, "loopback connection"
         conn = ClientConnection(app, StorageOperationHandler(app), node)
         try:
             conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
                 None if name else app.uuid, app.server, name or app.name))
         except ConnectionClosed:
             if previous_node is self.current_node:
                 return
     if previous_node is not None and previous_node.isConnected():
         app.closeClient(previous_node.getConnection())
Beispiel #5
0
 def __getConnection(self):
     if not self.connected:
         self.connection = ClientConnection(self, self.handler, self.server)
         # Never delay reconnection to master. This speeds up unit tests
         # and it should not change anything for normal use.
         try:
             self.connection.setReconnectionNoDelay()
         except ConnectionClosed:
             self.connection = None
         while not self.connected:
             if self.connection is None:
                 raise NotReadyException('not connected')
             self.em.poll(1)
     return self.connection
Beispiel #6
0
    def playSecondaryRole(self):
        """
        I play a secondary role, thus only wait for a primary master to fail.
        """
        logging.info('play the secondary role with %r', self.listening_conn)

        # Wait for an announcement. If this is too long, probably
        # the primary master is down.
        # XXX: Same remark as in electPrimary.
        t = time() + 10
        while self.primary_master_node is None:
            self.em.poll(1)
            if t < time():
                # election timeout
                raise ElectionFailure("Election timeout")
        self.master_address_dict.clear()

        # Restart completely. Non-optimized
        # but lower level code needs to be stabilized first.
        for conn in self.em.getConnectionList():
            if not conn.isListening():
                conn.close()

        # Reconnect to primary master node.
        primary_handler = secondary.PrimaryHandler(self)
        ClientConnection(self, primary_handler, self.primary_master_node)

        # and another for the future incoming connections
        self.listening_conn.setHandler(
            identification.SecondaryIdentificationHandler(self))

        while True:
            self.em.poll(1)
Beispiel #7
0
 def getLoopbackConnection(self):
     app = MasterApplication(address=BIND,
         getSSL=NEOCluster.SSL, getReplicas=0, getPartitions=1)
     try:
         handler = EventHandler(app)
         app.listening_conn = ListeningConnection(app, handler, app.server)
         yield ClientConnection(app, handler, app.nm.createMaster(
             address=app.listening_conn.getAddress(), uuid=app.uuid))
     finally:
         app.close()
Beispiel #8
0
 def __getConnection(self):
     if not self.connected:
         self.connection = ClientConnection(self, self.handler, self.server)
         # Never delay reconnection to master. This speeds up unit tests
         # and it should not change anything for normal use.
         try:
             self.connection.setReconnectionNoDelay()
         except ConnectionClosed:
             self.connection = None
         while not self.connected:
             if self.connection is None:
                 raise NotReadyException('not connected')
             self.em.poll(1)
     return self.connection
Beispiel #9
0
 def getLoopbackConnection(self):
     app = MasterApplication(getSSL=NEOCluster.SSL,
         getReplicas=0, getPartitions=1)
     handler = EventHandler(app)
     app.listening_conn = ListeningConnection(app, handler, app.server)
     node = app.nm.createMaster(address=app.listening_conn.getAddress(),
                                uuid=app.uuid)
     conn = ClientConnection.__new__(ClientConnection)
     def reset():
         conn.__dict__.clear()
         conn.__init__(app, handler, node)
         conn.reset = reset
     reset()
     return conn
Beispiel #10
0
    def getLoopbackConnection(self):
        app = MasterApplication(getSSL=NEOCluster.SSL,
                                getReplicas=0,
                                getPartitions=1)
        handler = EventHandler(app)
        app.listening_conn = ListeningConnection(app, handler, app.server)
        node = app.nm.createMaster(address=app.listening_conn.getAddress(),
                                   uuid=app.uuid)
        conn = ClientConnection.__new__(ClientConnection)

        def reset():
            conn.__dict__.clear()
            conn.__init__(app, handler, node)
            conn.reset = reset

        reset()
        return conn
Beispiel #11
0
 def _makeClientConnection(self):
     with dummy_connector:
         conn = ClientConnection(self.app, self.handler, self.node)
     self.connector = conn.connector
     return conn
Beispiel #12
0
class NeoCTL(BaseApplication):

    connection = None
    connected = False

    @classmethod
    def _buildOptionParser(cls):
        # XXX: Use argparse sub-commands.
        parser = cls.option_parser
        parser.description = "NEO Control node"
        parser('a',
               'address',
               default='127.0.0.1:9999',
               parse=lambda x: util.parseNodeAddress(x, 9999),
               help="address of an admin node")
        parser.argument('cmd',
                        nargs=argparse.REMAINDER,
                        help="command to execute; if not supplied,"
                        " the list of available commands is displayed")

    def __init__(self, address, **kw):
        super(NeoCTL, self).__init__(**kw)
        self.server = self.nm.createAdmin(address=address)
        self.handler = CommandEventHandler(self)
        self.response_queue = []

    def __getConnection(self):
        if not self.connected:
            self.connection = ClientConnection(self, self.handler, self.server)
            # Never delay reconnection to master. This speeds up unit tests
            # and it should not change anything for normal use.
            try:
                self.connection.setReconnectionNoDelay()
            except ConnectionClosed:
                self.connection = None
            while not self.connected:
                if self.connection is None:
                    raise NotReadyException('not connected')
                self.em.poll(1)
        return self.connection

    def __ask(self, packet):
        # TODO: make thread-safe
        connection = self.__getConnection()
        connection.ask(packet)
        response_queue = self.response_queue
        assert len(response_queue) == 0
        while self.connected:
            self.em.poll(1)
            if response_queue:
                break
        else:
            raise NotReadyException, 'Connection closed'
        response = response_queue.pop()
        if response[0] == Packets.Error and \
           response[1] == ErrorCodes.NOT_READY:
            raise NotReadyException(response[2])
        return response

    def enableStorageList(self, uuid_list):
        """
          Put all given storage nodes in "running" state.
        """
        packet = Packets.AddPendingNodes(uuid_list)
        response = self.__ask(packet)
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def tweakPartitionTable(self, uuid_list=(), dry_run=False):
        response = self.__ask(Packets.TweakPartitionTable(dry_run, uuid_list))
        if response[0] != Packets.AnswerTweakPartitionTable:
            raise RuntimeError(response)
        return response[1:]

    def setNumReplicas(self, nr):
        response = self.__ask(Packets.SetNumReplicas(nr))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def setClusterState(self, state):
        """
          Set cluster state.
        """
        packet = Packets.SetClusterState(state)
        response = self.__ask(packet)
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def _setNodeState(self, node, state):
        """
          Kill node, or remove it permanently
        """
        response = self.__ask(Packets.SetNodeState(node, state))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def getClusterState(self):
        """
          Get cluster state.
        """
        packet = Packets.AskClusterState()
        response = self.__ask(packet)
        if response[0] != Packets.AnswerClusterState:
            raise RuntimeError(response)
        return response[1]

    def getLastIds(self):
        response = self.__ask(Packets.AskLastIDs())
        if response[0] != Packets.AnswerLastIDs:
            raise RuntimeError(response)
        return response[1:]

    def getLastTransaction(self):
        response = self.__ask(Packets.AskLastTransaction())
        if response[0] != Packets.AnswerLastTransaction:
            raise RuntimeError(response)
        return response[1]

    def getRecovery(self):
        response = self.__ask(Packets.AskRecovery())
        if response[0] != Packets.AnswerRecovery:
            raise RuntimeError(response)
        return response[1:]

    def getNodeList(self, node_type=None):
        """
          Get a list of nodes, filtering with given type.
        """
        packet = Packets.AskNodeList(node_type)
        response = self.__ask(packet)
        if response[0] != Packets.AnswerNodeList:
            raise RuntimeError(response)
        return response[1]  # node_list

    def getPartitionRowList(self, min_offset=0, max_offset=0, node=None):
        """
          Get a list of partition rows, bounded by min & max and involving
          given node.
        """
        packet = Packets.AskPartitionList(min_offset, max_offset, node)
        response = self.__ask(packet)
        if response[0] != Packets.AnswerPartitionList:
            raise RuntimeError(response)
        return response[1:]

    def startCluster(self):
        """
          Set cluster into "verifying" state.
        """
        return self.setClusterState(ClusterStates.VERIFYING)

    def killNode(self, node):
        return self._setNodeState(node, NodeStates.DOWN)

    def dropNode(self, node):
        return self._setNodeState(node, NodeStates.UNKNOWN)

    def getPrimary(self):
        """
          Return the primary master UUID.
        """
        packet = Packets.AskPrimary()
        response = self.__ask(packet)
        if response[0] != Packets.AnswerPrimary:
            raise RuntimeError(response)
        return response[1]

    def repair(self, *args):
        response = self.__ask(Packets.Repair(*args))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def truncate(self, tid):
        response = self.__ask(Packets.Truncate(tid))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def checkReplicas(self, *args):
        response = self.__ask(Packets.CheckReplicas(*args))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def flushLog(self):
        conn = self.__getConnection()
        conn.send(Packets.FlushLog())
        while conn.pending():
            self.em.poll(1)
Beispiel #13
0
    def electPrimary(self):
        """Elect a primary master node.

        The difficulty is that a master node must accept connections from
        others while attempting to connect to other master nodes at the
        same time. Note that storage nodes and client nodes may connect
        to self as well as master nodes."""
        logging.info('begin the election of a primary master')

        client_handler = election.ClientElectionHandler(self)
        self.unconnected_master_node_set.clear()
        self.negotiating_master_node_set.clear()
        self.master_address_dict.clear()
        self.listening_conn.setHandler(election.ServerElectionHandler(self))
        getByAddress = self.nm.getByAddress

        while True:

            # handle new connected masters
            for node in self.nm.getMasterList():
                node.setUnknown()
                self.unconnected_master_node_set.add(node.getAddress())

            # start the election process
            self.primary = None
            self.primary_master_node = None
            try:
                while (self.unconnected_master_node_set
                       or self.negotiating_master_node_set):
                    for addr in self.unconnected_master_node_set:
                        self.negotiating_master_node_set.add(addr)
                        ClientConnection(
                            self,
                            client_handler,
                            # XXX: Ugly, but the whole election code will be
                            # replaced soon
                            getByAddress(addr))
                    self.unconnected_master_node_set.clear()
                    self.em.poll(1)
            except ElectionFailure, m:
                # something goes wrong, clean then restart
                logging.error('election failed: %s', m)

                # Ask all connected nodes to reelect a single primary master.
                for conn in self.em.getClientList():
                    conn.notify(Packets.ReelectPrimary())
                    conn.abort()

                # Wait until the connections are closed.
                self.primary = None
                self.primary_master_node = None
                # XXX: Since poll does not wake up anymore every second,
                #      the following time condition should be reviewed.
                #      See also playSecondaryRole.
                t = time() + 10
                while self.em.getClientList() and time() < t:
                    try:
                        self.em.poll(1)
                    except ElectionFailure:
                        pass

                # Close all connections.
                for conn in self.em.getClientList() + self.em.getServerList():
                    conn.close()
            else:
                # election succeed, stop the process
                self.primary = self.primary is None
                break
Beispiel #14
0
    def run(self):
        """
        Recover the status about the cluster. Obtain the last OID, the last
        TID, and the last Partition Table ID from storage nodes, then get
        back the latest partition table or make a new table from scratch,
        if this is the first time.
        A new primary master may also arise during this phase.
        """
        logging.info('begin the recovery of the status')
        app = self.app
        pt = app.pt
        app.changeClusterState(ClusterStates.RECOVERING)
        pt.clear()

        self.try_secondary = True

        # collect the last partition table available
        poll = app.em.poll
        while 1:
            if self.try_secondary:
                # Keep trying to connect to all other known masters,
                # to make sure there is a challege between each pair
                # of masters in the cluster. If we win, all connections
                # opened here will be closed.
                self.try_secondary = False
                node_list = []
                for node in app.nm.getMasterList():
                    if not (node is app._node or node.isConnected(True)):
                        # During recovery, master nodes are not put back in
                        # DOWN state by handlers. This is done
                        # entirely in this method (here and after this poll
                        # loop), to minimize the notification packets.
                        if not node.isDown():
                            node.setDown()
                            node_list.append(node)
                        ClientConnection(app, app.election_handler, node)
                if node_list:
                    app.broadcastNodesInformation(node_list)
            poll(1)
            if pt.filled():
                # A partition table exists, we are starting an existing
                # cluster.
                node_list = pt.getOperationalNodeSet()
                if app._startup_allowed:
                    node_list = [
                        node for node in node_list if node.isPending()
                    ]
                elif node_list:
                    # we want all nodes to be there if we're going to truncate
                    if app.truncate_tid:
                        node_list = pt.getNodeSet()
                    if not all(node.isPending() for node in node_list):
                        continue
            elif app._startup_allowed or app.autostart:
                # No partition table and admin allowed startup, we are
                # creating a new cluster out of all pending nodes.
                node_list = app.nm.getStorageList(only_identified=True)
                if not app._startup_allowed and len(node_list) < app.autostart:
                    continue
            else:
                continue
            if node_list and not any(node.getConnection().isPending()
                                     for node in node_list):
                if pt.filled():
                    if app.truncate_tid:
                        node_list = app.nm.getIdentifiedList(
                            pool_set={
                                uuid
                                for uuid, tid in
                                self.truncate_dict.iteritems()
                                if not tid or app.truncate_tid < tid
                            })
                        if node_list:
                            truncate = Packets.Truncate(app.truncate_tid)
                            for node in node_list:
                                conn = node.getConnection()
                                conn.send(truncate)
                                self.connectionCompleted(conn, False)
                            continue
                    node_list = pt.getConnectedNodeList()
                break

        logging.info('startup allowed')

        for node in node_list:
            assert node.isPending(), node
            node.setRunning()

        for node in app.nm.getMasterList():
            if not (node is app._node or node.isIdentified()):
                if node.isConnected(True):
                    node.getConnection().close()
                    assert node.isDown(), node
                elif not node.isDown():
                    assert self.try_secondary, node
                    node.setDown()
                    node_list.append(node)

        app.broadcastNodesInformation(node_list)

        if pt.getID() is None:
            logging.info('creating a new partition table')
            pt.make(node_list)
            self._notifyAdmins(
                Packets.SendPartitionTable(pt.getID(), pt.getRowList()))
        else:
            cell_list = pt.outdate()
            if cell_list:
                self._notifyAdmins(
                    Packets.NotifyPartitionChanges(pt.setNextID(), cell_list))
            if app.backup_tid:
                pt.setBackupTidDict(self.backup_tid_dict)
                app.backup_tid = pt.getBackupTid()

        logging.debug('cluster starts this partition table:')
        pt.log()
Beispiel #15
0
class NeoCTL(BaseApplication):

    connection = None
    connected = False

    def __init__(self, address, **kw):
        super(NeoCTL, self).__init__(**kw)
        self.server = self.nm.createAdmin(address=address)
        self.handler = CommandEventHandler(self)
        self.response_queue = []

    def __getConnection(self):
        if not self.connected:
            self.connection = ClientConnection(self, self.handler, self.server)
            # Never delay reconnection to master. This speeds up unit tests
            # and it should not change anything for normal use.
            try:
                self.connection.setReconnectionNoDelay()
            except ConnectionClosed:
                self.connection = None
            while not self.connected:
                if self.connection is None:
                    raise NotReadyException('not connected')
                self.em.poll(1)
        return self.connection

    def __ask(self, packet):
        # TODO: make thread-safe
        connection = self.__getConnection()
        connection.ask(packet)
        response_queue = self.response_queue
        assert len(response_queue) == 0
        while self.connected:
            self.em.poll(1)
            if response_queue:
                break
        else:
            raise NotReadyException, 'Connection closed'
        response = response_queue.pop()
        if response[0] == Packets.Error and \
           response[1] == ErrorCodes.NOT_READY:
            raise NotReadyException(response[2])
        return response

    def enableStorageList(self, uuid_list):
        """
          Put all given storage nodes in "running" state.
        """
        packet = Packets.AddPendingNodes(uuid_list)
        response = self.__ask(packet)
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def tweakPartitionTable(self, uuid_list=()):
        response = self.__ask(Packets.TweakPartitionTable(uuid_list))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def setClusterState(self, state):
        """
          Set cluster state.
        """
        packet = Packets.SetClusterState(state)
        response = self.__ask(packet)
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def _setNodeState(self, node, state):
        """
          Kill node, or remove it permanently
        """
        response = self.__ask(Packets.SetNodeState(node, state))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def getClusterState(self):
        """
          Get cluster state.
        """
        packet = Packets.AskClusterState()
        response = self.__ask(packet)
        if response[0] != Packets.AnswerClusterState:
            raise RuntimeError(response)
        return response[1]

    def getLastIds(self):
        response = self.__ask(Packets.AskLastIDs())
        if response[0] != Packets.AnswerLastIDs:
            raise RuntimeError(response)
        return response[1:]

    def getLastTransaction(self):
        response = self.__ask(Packets.AskLastTransaction())
        if response[0] != Packets.AnswerLastTransaction:
            raise RuntimeError(response)
        return response[1]

    def getRecovery(self):
        response = self.__ask(Packets.AskRecovery())
        if response[0] != Packets.AnswerRecovery:
            raise RuntimeError(response)
        return response[1:]

    def getNodeList(self, node_type=None):
        """
          Get a list of nodes, filtering with given type.
        """
        packet = Packets.AskNodeList(node_type)
        response = self.__ask(packet)
        if response[0] != Packets.AnswerNodeList:
            raise RuntimeError(response)
        return response[1] # node_list

    def getPartitionRowList(self, min_offset=0, max_offset=0, node=None):
        """
          Get a list of partition rows, bounded by min & max and involving
          given node.
        """
        packet = Packets.AskPartitionList(min_offset, max_offset, node)
        response = self.__ask(packet)
        if response[0] != Packets.AnswerPartitionList:
            raise RuntimeError(response)
        return response[1:3] # ptid, row_list

    def startCluster(self):
        """
          Set cluster into "verifying" state.
        """
        return self.setClusterState(ClusterStates.VERIFYING)

    def killNode(self, node):
        return self._setNodeState(node, NodeStates.UNKNOWN)

    def dropNode(self, node):
        return self._setNodeState(node, NodeStates.DOWN)

    def getPrimary(self):
        """
          Return the primary master UUID.
        """
        packet = Packets.AskPrimary()
        response = self.__ask(packet)
        if response[0] != Packets.AnswerPrimary:
            raise RuntimeError(response)
        return response[1]

    def truncate(self, tid):
        response = self.__ask(Packets.Truncate(tid))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def checkReplicas(self, *args):
        response = self.__ask(Packets.CheckReplicas(*args))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]
Beispiel #16
0
class NeoCTL(BaseApplication):

    connection = None
    connected = False

    def __init__(self, address, **kw):
        super(NeoCTL, self).__init__(**kw)
        self.server = self.nm.createAdmin(address=address)
        self.handler = CommandEventHandler(self)
        self.response_queue = []

    def __getConnection(self):
        if not self.connected:
            self.connection = ClientConnection(self, self.handler, self.server)
            # Never delay reconnection to master. This speeds up unit tests
            # and it should not change anything for normal use.
            try:
                self.connection.setReconnectionNoDelay()
            except ConnectionClosed:
                self.connection = None
            while not self.connected:
                if self.connection is None:
                    raise NotReadyException('not connected')
                self.em.poll(1)
        return self.connection

    def __ask(self, packet):
        # TODO: make thread-safe
        connection = self.__getConnection()
        connection.ask(packet)
        response_queue = self.response_queue
        assert len(response_queue) == 0
        while self.connected:
            self.em.poll(1)
            if response_queue:
                break
        else:
            raise NotReadyException, 'Connection closed'
        response = response_queue.pop()
        if response[0] == Packets.Error and \
           response[1] == ErrorCodes.NOT_READY:
            raise NotReadyException(response[2])
        return response

    def enableStorageList(self, uuid_list):
        """
          Put all given storage nodes in "running" state.
        """
        packet = Packets.AddPendingNodes(uuid_list)
        response = self.__ask(packet)
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def tweakPartitionTable(self, uuid_list=()):
        response = self.__ask(Packets.TweakPartitionTable(uuid_list))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def setClusterState(self, state):
        """
          Set cluster state.
        """
        packet = Packets.SetClusterState(state)
        response = self.__ask(packet)
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def _setNodeState(self, node, state):
        """
          Kill node, or remove it permanently
        """
        response = self.__ask(Packets.SetNodeState(node, state))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def getClusterState(self):
        """
          Get cluster state.
        """
        packet = Packets.AskClusterState()
        response = self.__ask(packet)
        if response[0] != Packets.AnswerClusterState:
            raise RuntimeError(response)
        return response[1]

    def getLastIds(self):
        response = self.__ask(Packets.AskLastIDs())
        if response[0] != Packets.AnswerLastIDs:
            raise RuntimeError(response)
        return response[1:]

    def getLastTransaction(self):
        response = self.__ask(Packets.AskLastTransaction())
        if response[0] != Packets.AnswerLastTransaction:
            raise RuntimeError(response)
        return response[1]

    def getRecovery(self):
        response = self.__ask(Packets.AskRecovery())
        if response[0] != Packets.AnswerRecovery:
            raise RuntimeError(response)
        return response[1:]

    def getNodeList(self, node_type=None):
        """
          Get a list of nodes, filtering with given type.
        """
        packet = Packets.AskNodeList(node_type)
        response = self.__ask(packet)
        if response[0] != Packets.AnswerNodeList:
            raise RuntimeError(response)
        return response[1]  # node_list

    def getPartitionRowList(self, min_offset=0, max_offset=0, node=None):
        """
          Get a list of partition rows, bounded by min & max and involving
          given node.
        """
        packet = Packets.AskPartitionList(min_offset, max_offset, node)
        response = self.__ask(packet)
        if response[0] != Packets.AnswerPartitionList:
            raise RuntimeError(response)
        return response[1:3]  # ptid, row_list

    def startCluster(self):
        """
          Set cluster into "verifying" state.
        """
        return self.setClusterState(ClusterStates.VERIFYING)

    def killNode(self, node):
        return self._setNodeState(node, NodeStates.UNKNOWN)

    def dropNode(self, node):
        return self._setNodeState(node, NodeStates.DOWN)

    def getPrimary(self):
        """
          Return the primary master UUID.
        """
        packet = Packets.AskPrimary()
        response = self.__ask(packet)
        if response[0] != Packets.AnswerPrimary:
            raise RuntimeError(response)
        return response[1]

    def truncate(self, tid):
        response = self.__ask(Packets.Truncate(tid))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]

    def checkReplicas(self, *args):
        response = self.__ask(Packets.CheckReplicas(*args))
        if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
            raise RuntimeError(response)
        return response[2]
Beispiel #17
0
    def playSecondaryRole(self):
        """
        A master play the secondary role when it is unlikely to win the
        election (it lost against against another master during identification
        or it was notified that another is the primary master).
        Its only task is to try again to become the primary master when the
        later fail. When connected to the cluster, the only communication is
        with the primary master, to stay informed about removed/added master
        nodes, and exit if requested.
        """
        logging.info('play the secondary role with %r', self.listening_conn)
        self.primary = None
        handler = master.PrimaryHandler(self)
        # The connection to the probably-primary master can be in any state
        # depending on how we were informed. The only case in which it can not
        # be reused in when we have pending requests.
        if self.primary_master.isConnected(True):
            master_conn = self.primary_master.getConnection()
            # When we find the primary during identification, we don't attach
            # the connection (a server one) to any node, and it will be closed
            # in the below 'for' loop.
            assert master_conn.isClient(), master_conn
            try:
                # We want the handler to be effective immediately.
                # If it's not possible, let's just reconnect.
                if not master_conn.setHandler(handler):
                    master_conn.close()
                    assert False
            except PrimaryFailure:
                master_conn = None
        else:
            master_conn = None
        for conn in self.em.getConnectionList():
            if conn.isListening():
                conn.setHandler(
                    identification.SecondaryIdentificationHandler(self))
            elif conn is not master_conn:
                conn.close()

        failed = {self.server}
        poll = self.em.poll
        while True:
            try:
                if master_conn is None:
                    for node in self.nm.getMasterList():
                        node.setDown()
                    node = self.primary_master
                    failed.add(node.getAddress())
                    if not node.isConnected(True):
                        # On immediate connection failure,
                        # PrimaryFailure is raised.
                        ClientConnection(self, handler, node)
                else:
                    master_conn = None
                while True:
                    poll(1)
            except PrimaryFailure:
                if self.primary_master.isRunning():
                    # XXX: What's the best to do here ? Another option is to
                    #      choose the RUNNING master node with the lowest
                    #      election key (i.e. (id_timestamp, address) as
                    #      defined in IdentificationHandler), and return if we
                    #      have the lowest one.
                    failed = {self.server}
                else:
                    # Since the last primary failure (or since we play the
                    # secondary role), do not try any node more than once.
                    for self.primary_master in self.nm.getMasterList():
                        if self.primary_master.getAddress() not in failed:
                            break
                    else:
                        # All known master nodes are either down or secondary.
                        # Let's play the primary role again.
                        break
            except PrimaryElected, e:
                node = self.primary_master
                self.primary_master, = e.args
                assert node is not self.primary_master, node
                try:
                    node.getConnection().close()
                except PrimaryFailure:
                    pass