Exemple #1
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ', '.join(map(uuid_str, uuid_list))
     logging.debug('Add nodes %s', uuids)
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not add nodes in %s state' % state)
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList()
             if node.isPending() and node.getUUID() in uuid_list))
     if node_list:
         p = Packets.StartOperation(bool(app.backup_tid))
         for node in node_list:
             node.setRunning()
             node.notify(p)
         app.broadcastNodesInformation(node_list)
         conn.answer(
             Errors.Ack('Nodes added: %s' %
                        ', '.join(uuid_str(x.getUUID())
                                  for x in node_list)))
     else:
         logging.warning('No node added')
         conn.answer(Errors.Ack('No node added'))
Exemple #2
0
    def setClusterState(self, conn, state):
        app = self.app
        # check request
        try:
            if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
                raise ProtocolError('Can not switch to this state')
        except KeyError:
            if state != ClusterStates.STOPPING:
                raise ProtocolError('Invalid state requested')

        # change state
        if state == ClusterStates.VERIFYING:
            storage_list = app.nm.getStorageList(only_identified=True)
            if not storage_list:
                raise ProtocolError('Cannot exit recovery without any '
                    'storage node')
            for node in storage_list:
                assert node.isPending(), node
                if node.getConnection().isPending():
                    # XXX: It's wrong to use ProtocolError here. We must reply
                    #      less aggressively because the admin has no way to
                    #      know that there's still pending activity.
                    raise ProtocolError('Cannot exit recovery now: node %r is '
                        'entering cluster' % (node, ))
            app._startup_allowed = True
            state = app.cluster_state
        elif state == ClusterStates.STARTING_BACKUP:
            if app.tm.hasPending() or app.nm.getClientList(True):
                raise ProtocolError("Can not switch to %s state with pending"
                    " transactions or connected clients" % state)

        conn.answer(Errors.Ack('Cluster state changed'))
        if state != app.cluster_state:
            raise StateChangedException(state)
Exemple #3
0
    def setClusterState(self, conn, state):
        app = self.app
        # check request
        try:
            if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
                raise AnswerDenied('Can not switch to this state')
        except KeyError:
            if state != ClusterStates.STOPPING:
                raise AnswerDenied('Invalid state requested')

        # change state
        if state == ClusterStates.VERIFYING:
            storage_list = app.nm.getStorageList(only_identified=True)
            if not storage_list:
                raise AnswerDenied(
                    'Cannot exit recovery without any storage node')
            for node in storage_list:
                assert node.isPending(), node
                if node.getConnection().isPending():
                    raise AnswerDenied(
                        'Cannot exit recovery now: node %r is entering cluster'
                        % node, )
            app._startup_allowed = True
            state = app.cluster_state
        elif state == ClusterStates.STARTING_BACKUP:
            if app.tm.hasPending() or app.nm.getClientList(True):
                raise AnswerDenied("Can not switch to %s state with pending"
                                   " transactions or connected clients" %
                                   state)

        conn.answer(Errors.Ack('Cluster state changed'))
        if state != app.cluster_state:
            raise StateChangedException(state)
Exemple #4
0
 def tweakPartitionTable(self, conn, uuid_list):
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not tweak partition table in %s state' %
                             state)
     app.broadcastPartitionChanges(
         app.pt.tweak(map(app.nm.getByUUID, uuid_list)))
     conn.answer(Errors.Ack(''))
Exemple #5
0
 def repair(self, conn, uuid_list, *args):
     getByUUID = self.app.nm.getByUUID
     node_list = []
     for uuid in uuid_list:
         node = getByUUID(uuid)
         if node is None or not (node.isStorage() and node.isIdentified()):
             raise ProtocolError("invalid storage node %s" % uuid_str(uuid))
         node_list.append(node)
     repair = Packets.NotifyRepair(*args)
     for node in node_list:
         node.send(repair)
     conn.answer(Errors.Ack(''))
Exemple #6
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ', '.join(map(uuid_str, uuid_list))
     logging.debug('Add nodes %s', uuids)
     app = self.app
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList()
             if node.isPending() and node.getUUID() in uuid_list))
     if node_list:
         for node in node_list:
             node.setRunning()
             app.startStorage(node)
         app.broadcastNodesInformation(node_list)
         conn.answer(
             Errors.Ack('Nodes added: %s' %
                        ', '.join(uuid_str(x.getUUID())
                                  for x in node_list)))
     else:
         logging.warning('No node added')
         conn.answer(Errors.Ack('No node added'))
Exemple #7
0
 def tweakPartitionTable(self, conn, uuid_list):
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING,
                      ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not tweak partition table in %s state'
                             % state)
     app.broadcastPartitionChanges(app.pt.tweak([node
         for node in app.nm.getStorageList()
         if node.getUUID() in uuid_list or not node.isRunning()]))
     conn.answer(Errors.Ack(''))
Exemple #8
0
 def checkReplicas(self, conn, partition_dict, min_tid, max_tid):
     app = self.app
     pt = app.pt
     backingup = bool(app.backup_tid)
     if not max_tid:
         max_tid = pt.getCheckTid(partition_dict) if backingup else \
             app.getLastTransaction()
     if min_tid > max_tid:
         logging.warning("nothing to check: min_tid=%s > max_tid=%s",
                         dump(min_tid), dump(max_tid))
     else:
         getByUUID = app.nm.getByUUID
         node_set = set()
         for offset, source in partition_dict.iteritems():
             # XXX: For the moment, code checking replicas is unable to fix
             #      corrupted partitions (when a good cell is known)
             #      so only check readable ones.
             #      (see also Checker._nextPartition of storage)
             cell_list = pt.getCellList(offset, True)
             #cell_list = [cell for cell in pt.getCellList(offset)
             #                  if not cell.isOutOfDate()]
             if len(cell_list) + (backingup and not source) <= 1:
                 continue
             for cell in cell_list:
                 node = cell.getNode()
                 if node in node_set:
                     break
             else:
                 node_set.add(node)
             if source:
                 source = '', getByUUID(source).getAddress()
             else:
                 readable = [
                     cell for cell in cell_list if cell.isReadable()
                 ]
                 if 1 == len(readable) < len(cell_list):
                     source = '', readable[0].getAddress()
                 elif backingup:
                     source = app.backup_app.name, random.choice(
                         app.backup_app.pt.getCellList(
                             offset, readable=True)).getAddress()
                 else:
                     source = '', None
             node.getConnection().notify(
                 Packets.CheckPartition(offset, source, min_tid, max_tid))
     conn.answer(Errors.Ack(''))
Exemple #9
0
 def truncate(self, conn, tid):
     app = self.app
     if app.cluster_state != ClusterStates.RUNNING:
         raise ProtocolError('Can not truncate in this state')
     conn.answer(Errors.Ack(''))
     raise StoppedOperation(tid)
Exemple #10
0
            if node.isConnected():
                # notify itself so it can shutdown
                node.send(Packets.NotifyNodeInformation(
                    monotonic_time(), [node.asTuple()]))
                # close to avoid handle the closure as a connection lost
                node.getConnection().abort()
            if keep:
                cell_list = app.pt.outdate()
            elif cell_list:
                message = 'node permanently removed'
            app.broadcastPartitionChanges(cell_list)
        else:
            node.setState(state)

        # /!\ send the node information *after* the partition table change
        conn.answer(Errors.Ack(message))
        if state_changed:
            # notify node explicitly because broadcastNodesInformation()
            # ignores non-running nodes
            assert not node.isRunning()
            if node.isConnected():
                node.send(Packets.NotifyNodeInformation(
                    monotonic_time(), [node.asTuple()]))
            app.broadcastNodesInformation([node])

    def addPendingNodes(self, conn, uuid_list):
        uuids = ', '.join(map(uuid_str, uuid_list))
        logging.debug('Add nodes %s', uuids)
        app = self.app
        state = app.getClusterState()
        # XXX: Would it be safe to allow more states ?
Exemple #11
0
 def truncate(self, conn, tid):
     conn.answer(Errors.Ack(''))
     raise StoppedOperation(tid)
Exemple #12
0
 def setNumReplicas(self, conn, num_replicas):
     self.app.broadcastPartitionChanges((), num_replicas)
     conn.answer(Errors.Ack(''))