Exemple #1
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ', '.join(map(uuid_str, uuid_list))
     logging.debug('Add nodes %s', uuids)
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not add nodes in %s state' % state)
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList()
             if node.isPending() and node.getUUID() in uuid_list))
     if node_list:
         p = Packets.StartOperation(bool(app.backup_tid))
         for node in node_list:
             node.setRunning()
             node.notify(p)
         app.broadcastNodesInformation(node_list)
         conn.answer(
             Errors.Ack('Nodes added: %s' %
                        ', '.join(uuid_str(x.getUUID())
                                  for x in node_list)))
     else:
         logging.warning('No node added')
         conn.answer(Errors.Ack('No node added'))
Exemple #2
0
    def setClusterState(self, conn, state):
        app = self.app
        # check request
        try:
            if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
                raise AnswerDenied('Can not switch to this state')
        except KeyError:
            if state != ClusterStates.STOPPING:
                raise AnswerDenied('Invalid state requested')

        # change state
        if state == ClusterStates.VERIFYING:
            storage_list = app.nm.getStorageList(only_identified=True)
            if not storage_list:
                raise AnswerDenied(
                    'Cannot exit recovery without any storage node')
            for node in storage_list:
                assert node.isPending(), node
                if node.getConnection().isPending():
                    raise AnswerDenied(
                        'Cannot exit recovery now: node %r is entering cluster'
                        % node, )
            app._startup_allowed = True
            state = app.cluster_state
        elif state == ClusterStates.STARTING_BACKUP:
            if app.tm.hasPending() or app.nm.getClientList(True):
                raise AnswerDenied("Can not switch to %s state with pending"
                                   " transactions or connected clients" %
                                   state)

        conn.answer(Errors.Ack('Cluster state changed'))
        if state != app.cluster_state:
            raise StateChangedException(state)
Exemple #3
0
 def push():
     try:
         pack_tid = None  # TODO
         for serial, oid in object_list:
             oid_set = object_dict.get(serial)
             if oid_set:
                 if type(oid_set) is tuple:
                     object_dict[serial] = oid_set = set(oid_set)
                 if oid in oid_set:
                     oid_set.remove(oid)
                     if not oid_set:
                         del object_dict[serial]
                     continue
             object = dm.fetchObject(oid, serial)
             if not object:
                 conn.send(
                     Errors.ReplicationError(
                         "partition %u dropped or truncated" %
                         partition), msg_id)
                 return
             if not object[2]:  # creation undone
                 object = object[0], 0, ZERO_HASH, '', object[4]
             # Same as in askFetchTransactions.
             conn.send(Packets.AddObject(oid, *object), msg_id)
             yield conn.buffering
         conn.send(
             Packets.AnswerFetchObjects(pack_tid, next_tid, next_oid,
                                        object_dict), msg_id)
         yield
     except (weakref.ReferenceError, ConnectionClosed):
         pass
Exemple #4
0
    def setClusterState(self, conn, state):
        app = self.app
        # check request
        try:
            if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
                raise ProtocolError('Can not switch to this state')
        except KeyError:
            if state != ClusterStates.STOPPING:
                raise ProtocolError('Invalid state requested')

        # change state
        if state == ClusterStates.VERIFYING:
            storage_list = app.nm.getStorageList(only_identified=True)
            if not storage_list:
                raise ProtocolError('Cannot exit recovery without any '
                    'storage node')
            for node in storage_list:
                assert node.isPending(), node
                if node.getConnection().isPending():
                    # XXX: It's wrong to use ProtocolError here. We must reply
                    #      less aggressively because the admin has no way to
                    #      know that there's still pending activity.
                    raise ProtocolError('Cannot exit recovery now: node %r is '
                        'entering cluster' % (node, ))
            app._startup_allowed = True
            state = app.cluster_state
        elif state == ClusterStates.STARTING_BACKUP:
            if app.tm.hasPending() or app.nm.getClientList(True):
                raise ProtocolError("Can not switch to %s state with pending"
                    " transactions or connected clients" % state)

        conn.answer(Errors.Ack('Cluster state changed'))
        if state != app.cluster_state:
            raise StateChangedException(state)
Exemple #5
0
 def push():
     try:
         pack_tid = None  # TODO
         for tid in tid_list:
             if tid in peer_tid_set:
                 peer_tid_set.remove(tid)
             else:
                 t = dm.getTransaction(tid)
                 if t is None:
                     conn.send(
                         Errors.ReplicationError(
                             "partition %u dropped" % partition),
                         msg_id)
                     return
                 oid_list, user, desc, ext, packed, ttid = t
                 conn.send(
                     Packets.AddTransaction(tid, user, desc, ext,
                                            packed, ttid, oid_list),
                     msg_id)
                 yield
         conn.send(
             Packets.AnswerFetchTransactions(pack_tid, next_tid,
                                             peer_tid_set), msg_id)
         yield
     except (weakref.ReferenceError, ConnectionClosed):
         pass
Exemple #6
0
 def push():
     try:
         pack_tid = None  # TODO
         for serial, oid in object_list:
             oid_set = object_dict.get(serial)
             if oid_set:
                 if type(oid_set) is list:
                     object_dict[serial] = oid_set = set(oid_set)
                 if oid in oid_set:
                     oid_set.remove(oid)
                     if not oid_set:
                         del object_dict[serial]
                     continue
             object = dm.getObject(oid, serial)
             if not object:
                 conn.send(
                     Errors.ReplicationError(
                         "partition %u dropped or truncated" %
                         partition), msg_id)
                 return
             conn.send(Packets.AddObject(oid, serial, *object[2:]),
                       msg_id)
             yield
         conn.send(
             Packets.AnswerFetchObjects(pack_tid, next_tid, next_oid,
                                        object_dict), msg_id)
         yield
     except (weakref.ReferenceError, ConnectionClosed):
         pass
Exemple #7
0
 def push():
     try:
         pack_tid = None  # TODO
         for tid in tid_list:
             if tid in peer_tid_set:
                 peer_tid_set.remove(tid)
             else:
                 t = dm.getTransaction(tid)
                 if t is None:
                     conn.send(
                         Errors.ReplicationError(
                             "partition %u dropped" % partition),
                         msg_id)
                     return
                 oid_list, user, desc, ext, packed, ttid = t
                 # Sending such packet does not mark the connection
                 # for writing if there's too little data in the buffer.
                 conn.send(
                     Packets.AddTransaction(tid, user, desc, ext,
                                            bool(packed), ttid,
                                            oid_list), msg_id)
                 # To avoid delaying several connections simultaneously,
                 # and also prevent the backend from scanning different
                 # parts of the DB at the same time, we ask the
                 # scheduler not to switch to another background task.
                 # Ideally, we are filling a buffer while the kernel
                 # is flushing another one for a concurrent connection.
                 yield conn.buffering
         conn.send(
             Packets.AnswerFetchTransactions(pack_tid, next_tid,
                                             peer_tid_set), msg_id)
         yield
     except (weakref.ReferenceError, ConnectionClosed):
         pass
Exemple #8
0
 def askTransactionInformation(self, conn, tid):
     t = self.app.dm.getTransaction(tid)
     if t is None:
         p = Errors.TidNotFound('%s does not exist' % dump(tid))
     else:
         p = Packets.AnswerTransactionInformation(tid, t[1], t[2], t[3],
                                                  bool(t[4]), t[0])
     conn.answer(p)
Exemple #9
0
 def askTransactionInformation(self, conn, tid):
     backup_tid = self.app.dm.getBackupTID()
     if tid > backup_tid:
         conn.answer(
             Errors.TidNotFound('tids > %s are not fully fetched yet' %
                                dump(backup_tid)))
         return
     super(ClientReadOnlyOperationHandler,
           self).askTransactionInformation(conn, tid)
Exemple #10
0
 def tweakPartitionTable(self, conn, uuid_list):
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not tweak partition table in %s state' %
                             state)
     app.broadcastPartitionChanges(
         app.pt.tweak(map(app.nm.getByUUID, uuid_list)))
     conn.answer(Errors.Ack(''))
Exemple #11
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ', '.join(map(uuid_str, uuid_list))
     logging.debug('Add nodes %s', uuids)
     app = self.app
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList()
             if node.isPending() and node.getUUID() in uuid_list))
     if node_list:
         for node in node_list:
             node.setRunning()
             app.startStorage(node)
         app.broadcastNodesInformation(node_list)
         conn.answer(
             Errors.Ack('Nodes added: %s' %
                        ', '.join(uuid_str(x.getUUID())
                                  for x in node_list)))
     else:
         logging.warning('No node added')
         conn.answer(Errors.Ack('No node added'))
Exemple #12
0
 def sendPartitionTable(self, conn, min_offset, max_offset, uuid):
     pt = self.pt
     if max_offset == 0:
         max_offset = pt.getPartitions()
     try:
         row_list = map(pt.getRow, xrange(min_offset, max_offset))
     except IndexError:
         conn.send(Errors.ProtocolError('invalid partition table offset'))
     else:
         conn.answer(
             Packets.AnswerPartitionList(pt.getID(), pt.getReplicas(),
                                         row_list))
Exemple #13
0
 def askObjectHistory(self, conn, oid, first, last):
     if first >= last:
         raise ProtocolError('invalid offsets')
     app = self.app
     if app.tm.loadLocked(oid):
         raise DelayEvent
     history_list = app.dm.getObjectHistory(oid, first, last - first)
     if history_list is None:
         p = Errors.OidNotFound(dump(oid))
     else:
         p = Packets.AnswerObjectHistory(oid, history_list)
     conn.answer(p)
Exemple #14
0
 def repair(self, conn, uuid_list, *args):
     getByUUID = self.app.nm.getByUUID
     node_list = []
     for uuid in uuid_list:
         node = getByUUID(uuid)
         if node is None or not (node.isStorage() and node.isIdentified()):
             raise ProtocolError("invalid storage node %s" % uuid_str(uuid))
         node_list.append(node)
     repair = Packets.NotifyRepair(*args)
     for node in node_list:
         node.send(repair)
     conn.answer(Errors.Ack(''))
Exemple #15
0
 def tweakPartitionTable(self, conn, uuid_list):
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING,
                      ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not tweak partition table in %s state'
                             % state)
     app.broadcastPartitionChanges(app.pt.tweak([node
         for node in app.nm.getStorageList()
         if node.getUUID() in uuid_list or not node.isRunning()]))
     conn.answer(Errors.Ack(''))
Exemple #16
0
 def askObjectUndoSerial(self, conn, ttid, ltid, undone_tid, oid_list):
     app = self.app
     findUndoTID = app.dm.findUndoTID
     getObjectFromTransaction = app.tm.getObjectFromTransaction
     object_tid_dict = {}
     for oid in oid_list:
         current_serial, undo_serial, is_current = findUndoTID(oid, ttid,
             ltid, undone_tid, getObjectFromTransaction(ttid, oid))
         if current_serial is None:
             p = Errors.OidNotFound(dump(oid))
             break
         object_tid_dict[oid] = (current_serial, undo_serial, is_current)
     else:
         p = Packets.AnswerObjectUndoSerial(object_tid_dict)
     conn.answer(p)
Exemple #17
0
 def askObjectUndoSerial(self, conn, ttid, ltid, undone_tid, oid_list):
     app = self.app
     findUndoTID = app.dm.findUndoTID
     getObjectFromTransaction = app.tm.getObjectFromTransaction
     object_tid_dict = {}
     for oid in oid_list:
         r = findUndoTID(oid, ttid, ltid, undone_tid,
                         getObjectFromTransaction(ttid, oid))
         if r:
             if not r[0]:
                 p = Errors.OidNotFound(dump(oid))
                 break
             object_tid_dict[oid] = r
     else:
         p = Packets.AnswerObjectUndoSerial(object_tid_dict)
     conn.answer(p)
Exemple #18
0
 def checkReplicas(self, conn, partition_dict, min_tid, max_tid):
     app = self.app
     pt = app.pt
     backingup = bool(app.backup_tid)
     if not max_tid:
         max_tid = pt.getCheckTid(partition_dict) if backingup else \
             app.getLastTransaction()
     if min_tid > max_tid:
         logging.warning("nothing to check: min_tid=%s > max_tid=%s",
                         dump(min_tid), dump(max_tid))
     else:
         getByUUID = app.nm.getByUUID
         node_set = set()
         for offset, source in partition_dict.iteritems():
             # XXX: For the moment, code checking replicas is unable to fix
             #      corrupted partitions (when a good cell is known)
             #      so only check readable ones.
             #      (see also Checker._nextPartition of storage)
             cell_list = pt.getCellList(offset, True)
             #cell_list = [cell for cell in pt.getCellList(offset)
             #                  if not cell.isOutOfDate()]
             if len(cell_list) + (backingup and not source) <= 1:
                 continue
             for cell in cell_list:
                 node = cell.getNode()
                 if node in node_set:
                     break
             else:
                 node_set.add(node)
             if source:
                 source = '', getByUUID(source).getAddress()
             else:
                 readable = [
                     cell for cell in cell_list if cell.isReadable()
                 ]
                 if 1 == len(readable) < len(cell_list):
                     source = '', readable[0].getAddress()
                 elif backingup:
                     source = app.backup_app.name, random.choice(
                         app.backup_app.pt.getCellList(
                             offset, readable=True)).getAddress()
                 else:
                     source = '', None
             node.getConnection().notify(
                 Packets.CheckPartition(offset, source, min_tid, max_tid))
     conn.answer(Errors.Ack(''))
Exemple #19
0
 def sendPartitionTable(self, conn, min_offset, max_offset, uuid):
     # we have a pt
     self.pt.log()
     row_list = []
     if max_offset == 0:
         max_offset = self.pt.getPartitions()
     try:
         for offset in xrange(min_offset, max_offset):
             row = []
             try:
                 for cell in self.pt.getCellList(offset):
                     if uuid is None or cell.getUUID() == uuid:
                         row.append((cell.getUUID(), cell.getState()))
             except TypeError:
                 pass
             row_list.append((offset, row))
     except IndexError:
         conn.send(Errors.ProtocolError('invalid partition table offset'))
     else:
         conn.answer(Packets.AnswerPartitionList(self.pt.getID(), row_list))
Exemple #20
0
 def askFinishTransaction(self, conn, ttid, oid_list, checked_list):
     app = self.app
     tid, node_list = app.tm.prepare(
         app,
         ttid,
         oid_list,
         checked_list,
         conn.getPeerId(),
     )
     if tid:
         p = Packets.AskLockInformation(ttid, tid)
         for node in node_list:
             node.ask(p)
     else:
         conn.answer(Errors.IncompleteTransaction())
         # It's simpler to abort automatically rather than asking the client
         # to send a notification on tpc_abort, since it would have keep the
         # transaction longer in list of transactions.
         # This should happen so rarely that we don't try to minimize the
         # number of abort notifications by looking the modified partitions.
         self.abortTransaction(conn, ttid, app.getStorageReadySet())
Exemple #21
0
            if node.isConnected():
                # notify itself so it can shutdown
                node.send(Packets.NotifyNodeInformation(
                    monotonic_time(), [node.asTuple()]))
                # close to avoid handle the closure as a connection lost
                node.getConnection().abort()
            if keep:
                cell_list = app.pt.outdate()
            elif cell_list:
                message = 'node permanently removed'
            app.broadcastPartitionChanges(cell_list)
        else:
            node.setState(state)

        # /!\ send the node information *after* the partition table change
        conn.answer(Errors.Ack(message))
        if state_changed:
            # notify node explicitly because broadcastNodesInformation()
            # ignores non-running nodes
            assert not node.isRunning()
            if node.isConnected():
                node.send(Packets.NotifyNodeInformation(
                    monotonic_time(), [node.asTuple()]))
            app.broadcastNodesInformation([node])

    def addPendingNodes(self, conn, uuid_list):
        uuids = ', '.join(map(uuid_str, uuid_list))
        logging.debug('Add nodes %s', uuids)
        app = self.app
        state = app.getClusterState()
        # XXX: Would it be safe to allow more states ?
Exemple #22
0
 def truncate(self, conn, tid):
     app = self.app
     if app.cluster_state != ClusterStates.RUNNING:
         raise ProtocolError('Can not truncate in this state')
     conn.answer(Errors.Ack(''))
     raise StoppedOperation(tid)
Exemple #23
0
 def _readOnly(self, conn, *args, **kw):
     conn.answer(
         Errors.ReadOnlyAccess(
             'read-only access because cluster is in backuping mode'))
Exemple #24
0
 def truncate(self, conn, tid):
     conn.answer(Errors.Ack(''))
     raise StoppedOperation(tid)
Exemple #25
0
 def setNumReplicas(self, conn, num_replicas):
     self.app.broadcastPartitionChanges((), num_replicas)
     conn.answer(Errors.Ack(''))
Exemple #26
0
class ClientOperationHandler(EventHandler):

    def askTransactionInformation(self, conn, tid):
        t = self.app.dm.getTransaction(tid)
        if t is None:
            p = Errors.TidNotFound('%s does not exist' % dump(tid))
        else:
            p = Packets.AnswerTransactionInformation(tid, t[1], t[2], t[3],
                    t[4], t[0])
        conn.answer(p)

    def askObject(self, conn, oid, serial, tid):
        app = self.app
        if app.tm.loadLocked(oid):
            # Delay the response.
            app.queueEvent(self.askObject, conn, (oid, serial, tid))
            return
        o = app.dm.getObject(oid, serial, tid)
        try:
            serial, next_serial, compression, checksum, data, data_serial = o
        except TypeError:
            p = (Errors.OidDoesNotExist if o is None else
                 Errors.OidNotFound)(dump(oid))
        else:
            if checksum is None:
                checksum = ZERO_HASH
                data = ''
            p = Packets.AnswerObject(oid, serial, next_serial,
                compression, checksum, data, data_serial)
        conn.answer(p)

    def connectionLost(self, conn, new_state):
        uuid = conn.getUUID()
        node = self.app.nm.getByUUID(uuid)
        if self.app.listening_conn: # if running
            assert node is not None, conn
            self.app.nm.remove(node)

    def abortTransaction(self, conn, ttid):
        self.app.tm.abort(ttid)

    def askStoreTransaction(self, conn, ttid, *txn_info):
        self.app.tm.register(conn.getUUID(), ttid)
        self.app.tm.vote(ttid, txn_info)
        conn.answer(Packets.AnswerStoreTransaction())

    def askVoteTransaction(self, conn, ttid):
        self.app.tm.vote(ttid)
        conn.answer(Packets.AnswerVoteTransaction())

    def _askStoreObject(self, conn, oid, serial, compression, checksum, data,
            data_serial, ttid, unlock, request_time):
        try:
            self.app.tm.storeObject(ttid, serial, oid, compression,
                    checksum, data, data_serial, unlock)
        except ConflictError, err:
            # resolvable or not
            conn.answer(Packets.AnswerStoreObject(1, oid, err.getTID()))
        except DelayedError:
            # locked by a previous transaction, retry later
            # If we are unlocking, we want queueEvent to raise
            # AlreadyPendingError, to avoid making lcient wait for an unneeded
            # response.
            try:
                self.app.queueEvent(self._askStoreObject, conn, (oid, serial,
                    compression, checksum, data, data_serial, ttid,
                    unlock, request_time), key=(oid, ttid),
                    raise_on_duplicate=unlock)
            except AlreadyPendingError:
                conn.answer(Errors.AlreadyPending(dump(oid)))
Exemple #27
0
        self._askCheckCurrentSerial(conn, ttid, serial, oid, time.time())

    def _askCheckCurrentSerial(self, conn, ttid, serial, oid, request_time):
        try:
            self.app.tm.checkCurrentSerial(ttid, serial, oid)
        except ConflictError, err:
            # resolvable or not
            conn.answer(Packets.AnswerCheckCurrentSerial(1, oid,
                err.getTID()))
        except DelayedError:
            # locked by a previous transaction, retry later
            try:
                self.app.queueEvent(self._askCheckCurrentSerial, conn, (ttid,
                    serial, oid, request_time), key=(oid, ttid))
            except AlreadyPendingError:
                conn.answer(Errors.AlreadyPending(dump(oid)))
        except NotRegisteredError:
            # transaction was aborted, cancel this event
            logging.info('Forget serial check of %s:%s by %s delayed by %s',
                dump(oid), dump(serial), dump(ttid),
                dump(self.app.tm.getLockingTID(oid)))
            # send an answer as the client side is waiting for it
            conn.answer(Packets.AnswerCheckCurrentSerial(0, oid, serial))
        else:
            if SLOW_STORE is not None:
                duration = time.time() - request_time
                if duration > SLOW_STORE:
                    logging.info('CheckCurrentSerial delay: %.02fs', duration)
            conn.answer(Packets.AnswerCheckCurrentSerial(0, oid, serial))