Beispiel #1
0
 def notifyReplicationDone(self, node, offset, tid):
     app = self.app
     cell = app.pt.getCell(offset, node.getUUID())
     tid_list = self.tid_list[offset]
     if tid_list:  # may be empty if the cell is out-of-date
         # or if we're not fully initialized
         if tid < tid_list[0]:
             cell.replicating = tid
         else:
             try:
                 tid = add64(tid_list[bisect(tid_list, tid)], -1)
             except IndexError:
                 last_tid = app.getLastTransaction()
                 if tid < last_tid:
                     tid = last_tid
                     node.send(Packets.Replicate(tid, '', {offset: None}))
     logging.debug("partition %u: updating backup_tid of %r to %s", offset,
                   cell, dump(tid))
     cell.backup_tid = tid
     # TODO: Provide invalidation feedback about new txns to read-only
     #       clients connected to backup cluster. Not only here but also
     #       hooked to in-progress feedback from fetchObjects (storage).
     # Forget tids we won't need anymore.
     cell_list = app.pt.getCellList(offset, readable=True)
     del tid_list[:bisect(tid_list, min(x.backup_tid for x in cell_list))]
     primary_node = self.primary_partition_dict.get(offset)
     primary = primary_node is node
     result = None if primary else app.pt.setUpToDate(node, offset)
     assert cell.isReadable()
     if result:  # was out-of-date
         if primary_node is not None:
             max_tid, = [
                 x.backup_tid for x in cell_list
                 if x.getNode() is primary_node
             ]
             if tid < max_tid:
                 cell.replicating = max_tid
                 logging.debug(
                     "ask %s to replicate partition %u up to %s from %s",
                     uuid_str(node.getUUID()), offset, dump(max_tid),
                     uuid_str(primary_node.getUUID()))
                 node.send(
                     Packets.Replicate(max_tid, '',
                                       {offset: primary_node.getAddress()}))
     else:
         if app.getClusterState() == ClusterStates.BACKINGUP:
             self.triggerBackup(node)
         if primary:
             # Notify secondary storages that they can replicate from
             # primary ones, even if they are already replicating.
             p = Packets.Replicate(tid, '', {offset: node.getAddress()})
             for cell in cell_list:
                 if max(cell.backup_tid, cell.replicating) < tid:
                     cell.replicating = tid
                     logging.debug(
                         "ask %s to replicate partition %u up to %s from %s",
                         uuid_str(cell.getUUID()), offset, dump(tid),
                         uuid_str(node.getUUID()))
                     cell.getNode().send(p)
     return result
Beispiel #2
0
 def notifyReplicationDone(self, node, offset, tid):
     app = self.app
     cell = app.pt.getCell(offset, node.getUUID())
     tid_list = self.tid_list[offset]
     if tid_list: # may be empty if the cell is out-of-date
                  # or if we're not fully initialized
         if tid < tid_list[0]:
             cell.replicating = tid
         else:
             try:
                 tid = add64(tid_list[bisect(tid_list, tid)], -1)
             except IndexError:
                 last_tid = app.getLastTransaction()
                 if tid < last_tid:
                     tid = last_tid
                     node.notify(Packets.Replicate(tid, '', {offset: None}))
     logging.debug("partition %u: updating backup_tid of %r to %s",
                   offset, cell, dump(tid))
     cell.backup_tid = tid
     # Forget tids we won't need anymore.
     cell_list = app.pt.getCellList(offset, readable=True)
     del tid_list[:bisect(tid_list, min(x.backup_tid for x in cell_list))]
     primary_node = self.primary_partition_dict.get(offset)
     primary = primary_node is node
     result = None if primary else app.pt.setUpToDate(node, offset)
     assert cell.isReadable()
     if result: # was out-of-date
         if primary_node is not None:
             max_tid, = [x.backup_tid for x in cell_list
                                      if x.getNode() is primary_node]
             if tid < max_tid:
                 cell.replicating = max_tid
                 logging.debug(
                     "ask %s to replicate partition %u up to %s from %s",
                     uuid_str(node.getUUID()), offset,  dump(max_tid),
                     uuid_str(primary_node.getUUID()))
                 node.notify(Packets.Replicate(max_tid, '',
                     {offset: primary_node.getAddress()}))
     else:
         if app.getClusterState() == ClusterStates.BACKINGUP:
             self.triggerBackup(node)
         if primary:
             # Notify secondary storages that they can replicate from
             # primary ones, even if they are already replicating.
             p = Packets.Replicate(tid, '', {offset: node.getAddress()})
             for cell in cell_list:
                 if max(cell.backup_tid, cell.replicating) < tid:
                     cell.replicating = tid
                     logging.debug(
                         "ask %s to replicate partition %u up to %s from %s",
                         uuid_str(cell.getUUID()), offset,
                         dump(tid), uuid_str(node.getUUID()))
                     cell.getNode().notify(p)
     return result
Beispiel #3
0
 def deadlock(self, storage_id, ttid, locking_tid):
     try:
         txn = self._ttid_dict[ttid]
     except KeyError:
         return
     if txn.locking_tid <= locking_tid:
         client = txn.getNode()
         txn.locking_tid = locking_tid = self._nextTID()
         logging.info('Deadlock avoidance triggered by %s for %s:'
             ' new locking tid for TXN %s is %s', uuid_str(storage_id),
             uuid_str(client.getUUID()), dump(ttid), dump(locking_tid))
         client.send(Packets.NotifyDeadlock(ttid, locking_tid))
Beispiel #4
0
 def _triggerSecondary(self, node, offset, tid, cell_list):
     # Notify secondary storages that they can replicate from
     # primary ones, even if they are already replicating.
     p = Packets.Replicate(tid, '', {offset: node.getAddress()})
     for cell in cell_list:
         if max(cell.backup_tid, cell.replicating) < tid:
             cell.replicating = tid
             logging.debug(
                 "ask %s to replicate partition %u up to %s from %s",
                 uuid_str(cell.getUUID()), offset, dump(tid),
                 uuid_str(node.getUUID()))
             cell.getNode().send(p)
Beispiel #5
0
    def setNodeState(self, conn, uuid, state):
        logging.info("set node state for %s: %s", uuid_str(uuid), state)
        app = self.app
        node = app.nm.getByUUID(uuid)
        if node is None:
            raise ProtocolError("unknown node")
        if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
            raise ProtocolError("can not switch node to this state")
        if uuid == app.uuid:
            raise ProtocolError("can not kill primary master node")

        state_changed = state != node.getState()
        message = "state changed" if state_changed else "node already in %s state" % state
        if node.isStorage():
            keep = state == NodeStates.UNKNOWN
            try:
                cell_list = app.pt.dropNodeList([node], keep)
            except PartitionTableException, e:
                raise ProtocolError(str(e))
            node.setState(state)
            if node.isConnected():
                # notify itself so it can shutdown
                node.notify(Packets.NotifyNodeInformation([node.asTuple()]))
                # close to avoid handle the closure as a connection lost
                node.getConnection().abort()
            if keep:
                cell_list = app.pt.outdate()
            elif cell_list:
                message = "node permanently removed"
            app.broadcastPartitionChanges(cell_list)
Beispiel #6
0
 def corrupt(offset):
     s0, s1, s2 = (storage_dict[cell.getUUID()]
         for cell in cluster.master.pt.getCellList(offset, True))
     logging.info('corrupt partition %u of %s',
                  offset, uuid_str(s1.uuid))
     s1.dm.deleteObject(p64(np+offset), p64(corrupt_tid))
     return s0.uuid
Beispiel #7
0
 def connectionFailed(self, conn):
     addr = conn.getAddress()
     node = self.app.nm.getByAddress(addr)
     assert node is not None, (uuid_str(self.app.uuid), addr)
     # node may still be in unknown state
     self.app.negotiating_master_node_set.discard(addr)
     super(ClientElectionHandler, self).connectionFailed(conn)
Beispiel #8
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ', '.join(map(uuid_str, uuid_list))
     logging.debug('Add nodes %s', uuids)
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP,
                      ClusterStates.BACKINGUP):
         raise ProtocolError('Can not add nodes in %s state' % state)
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList()
             if node.isPending() and node.getUUID() in uuid_list))
     if node_list:
         p = Packets.StartOperation(bool(app.backup_tid))
         for node in node_list:
             node.setRunning()
             node.notify(p)
         app.broadcastNodesInformation(node_list)
         conn.answer(
             Errors.Ack('Nodes added: %s' %
                        ', '.join(uuid_str(x.getUUID())
                                  for x in node_list)))
     else:
         logging.warning('No node added')
         conn.answer(Errors.Ack('No node added'))
Beispiel #9
0
    def setNodeState(self, conn, uuid, state):
        logging.info("set node state for %s: %s", uuid_str(uuid), state)
        app = self.app
        node = app.nm.getByUUID(uuid)
        if node is None:
            raise ProtocolError('unknown node')
        if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
            raise ProtocolError('can not switch node to this state')
        if uuid == app.uuid:
            raise ProtocolError('can not kill primary master node')

        state_changed = state != node.getState()
        message = ('state changed' if state_changed else
                   'node already in %s state' % state)
        if node.isStorage():
            keep = state == NodeStates.DOWN
            try:
                cell_list = app.pt.dropNodeList([node], keep)
            except PartitionTableException, e:
                raise ProtocolError(str(e))
            node.setState(state)
            if node.isConnected():
                # notify itself so it can shutdown
                node.send(Packets.NotifyNodeInformation(
                    monotonic_time(), [node.asTuple()]))
                # close to avoid handle the closure as a connection lost
                node.getConnection().abort()
            if keep:
                cell_list = app.pt.outdate()
            elif cell_list:
                message = 'node permanently removed'
            app.broadcastPartitionChanges(cell_list)
Beispiel #10
0
 def triggerBackup(self, node):
     tid_list = self.tid_list
     tid = self.app.getLastTransaction()
     replicate_list = []
     for offset, cell in self.app.pt.iterNodeCell(node):
         max_tid = tid_list[offset]
         if max_tid and self.primary_partition_dict[offset] is node and \
            max(cell.backup_tid, cell.replicating) < max_tid[-1]:
             cell.replicating = tid
             replicate_list.append(offset)
     if not replicate_list:
         return
     getCellList = self.pt.getCellList
     source_dict = {}
     address_set = set()
     for offset in replicate_list:
         cell_list = getCellList(offset, readable=True)
         random.shuffle(cell_list)
         assert cell_list, offset
         for cell in cell_list:
             addr = cell.getAddress()
             if addr in address_set:
                 break
         else:
             address_set.add(addr)
         source_dict[offset] = addr
         logging.debug("ask %s to replicate partition %u up to %s from %r",
                       uuid_str(node.getUUID()), offset, dump(tid), addr)
     node.send(Packets.Replicate(tid, self.name, source_dict))
Beispiel #11
0
 def connectionLost(self, conn, new_state):
     app = self.app
     node = app.nm.getByUUID(conn.getUUID())
     if node is None:
         return  # for example, when a storage is removed by an admin
     assert node.isStorage(), node
     logging.info('storage node lost')
     if new_state != NodeStates.BROKEN:
         new_state = DISCONNECTED_STATE_DICT.get(node.getType(),
                                                 NodeStates.DOWN)
     assert new_state in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN,
                          NodeStates.BROKEN), new_state
     assert node.getState() not in (NodeStates.TEMPORARILY_DOWN,
                                    NodeStates.DOWN,
                                    NodeStates.BROKEN), (uuid_str(
                                        self.app.uuid), node.whoSetState(),
                                                         new_state)
     was_pending = node.isPending()
     node.setState(new_state)
     if new_state != NodeStates.BROKEN and was_pending:
         # was in pending state, so drop it from the node manager to forget
         # it and do not set in running state when it comes back
         logging.info('drop a pending node from the node manager')
         app.nm.remove(node)
     app.broadcastNodesInformation([node])
     if app.truncate_tid:
         raise StoppedOperation
     app.broadcastPartitionChanges(app.pt.outdate(node))
     if not app.pt.operational():
         raise StoppedOperation
Beispiel #12
0
 def triggerBackup(self, node):
     tid_list = self.tid_list
     tid = self.app.getLastTransaction()
     replicate_list = []
     for offset, cell in self.app.pt.iterNodeCell(node):
         max_tid = tid_list[offset]
         if max_tid and self.primary_partition_dict[offset] is node and \
            max(cell.backup_tid, cell.replicating) < max_tid[-1]:
             cell.replicating = tid
             replicate_list.append(offset)
     if not replicate_list:
         return
     getCellList = self.pt.getCellList
     source_dict = {}
     address_set = set()
     for offset in replicate_list:
         cell_list = getCellList(offset, readable=True)
         random.shuffle(cell_list)
         assert cell_list, offset
         for cell in cell_list:
             addr = cell.getAddress()
             if addr in address_set:
                 break
         else:
             address_set.add(addr)
         source_dict[offset] = addr
         logging.debug("ask %s to replicate partition %u up to %s from %r",
             uuid_str(node.getUUID()), offset,  dump(tid), addr)
     node.getConnection().notify(Packets.Replicate(
         tid, self.name, source_dict))
Beispiel #13
0
 def connectionLost(self, conn, new_state):
     app = self.app
     node = app.nm.getByUUID(conn.getUUID())
     if node is None:
         return  # for example, when a storage is removed by an admin
     assert node.isStorage(), node
     logging.info("storage node lost")
     if new_state != NodeStates.BROKEN:
         new_state = DISCONNECTED_STATE_DICT.get(node.getType(), NodeStates.DOWN)
     assert new_state in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN, NodeStates.BROKEN), new_state
     assert node.getState() not in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN, NodeStates.BROKEN), (
         uuid_str(self.app.uuid),
         node.whoSetState(),
         new_state,
     )
     was_pending = node.isPending()
     node.setState(new_state)
     if new_state != NodeStates.BROKEN and was_pending:
         # was in pending state, so drop it from the node manager to forget
         # it and do not set in running state when it comes back
         logging.info("drop a pending node from the node manager")
         app.nm.remove(node)
     app.broadcastNodesInformation([node])
     if app.truncate_tid:
         raise StoppedOperation
     app.broadcastPartitionChanges(app.pt.outdate(node))
     if not app.pt.operational():
         raise StoppedOperation
Beispiel #14
0
 def connectionFailed(self, conn):
     addr = conn.getAddress()
     node = self.app.nm.getByAddress(addr)
     assert node is not None, (uuid_str(self.app.uuid), addr)
     # node may still be in unknown state
     self.app.negotiating_master_node_set.discard(addr)
     super(ClientElectionHandler, self).connectionFailed(conn)
Beispiel #15
0
 def register(self, conn, ttid):
     """
         Register a transaction, it may be already registered
     """
     if ttid not in self._transaction_dict:
         uuid = conn.getUUID()
         logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid))
         self._transaction_dict[ttid] = Transaction(uuid, ttid)
Beispiel #16
0
 def register(self, conn, ttid):
     """
         Register a transaction, it may be already registered
     """
     if ttid not in self._transaction_dict:
         uuid = conn.getUUID()
         logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid))
         self._transaction_dict[ttid] = Transaction(uuid, ttid)
Beispiel #17
0
 def formatNodeList(self, node_list, _sort_key=itemgetter(2, 0, 1)):
     if not node_list:
         return 'Empty list!'
     node_list.sort(key=_sort_key)
     return '\n'.join(
         '%s - %s - %s - %s' %
         (node_type, uuid_str(uuid), address and '%s:%s' % address, state)
         for node_type, address, uuid, state in node_list)
Beispiel #18
0
 def formatNodeList(self, node_list, _sort_key=itemgetter(2, 0, 1)):
     if not node_list:
         return 'Empty list!'
     node_list.sort(key=_sort_key)
     return '\n'.join(
         '%s - %s - %s - %s' % (node_type, uuid_str(uuid),
                                address and '%s:%s' % address, state)
         for node_type, address, uuid, state in node_list)
Beispiel #19
0
 def corrupt(offset):
     s0, s1, s2 = (
         storage_dict[cell.getUUID()]
         for cell in cluster.master.pt.getCellList(offset, True))
     logging.info('corrupt partition %u of %s', offset,
                  uuid_str(s1.uuid))
     s1.dm.deleteObject(p64(np + offset), p64(corrupt_tid))
     return s0.uuid
Beispiel #20
0
 def abortFor(self, uuid):
     """
         Abort any non-locked transaction of a node
     """
     logging.debug('Abort for %s', uuid_str(uuid))
     # abort any non-locked transaction of this node
     for ttid, transaction in self._transaction_dict.items():
         if transaction.uuid == uuid:
             self.abort(ttid)
Beispiel #21
0
 def __repr__(self):
     return "<%s(ttid=%r, tid=%r, uuid=%r, locked=%r, age=%.2fs) at 0x%x>" \
      % (self.__class__.__name__,
         dump(self._ttid),
         dump(self._tid),
         uuid_str(self._uuid),
         self.isLocked(),
         time() - self._birth,
         id(self))
Beispiel #22
0
 def __repr__(self):
     return "<%s(ttid=%r, tid=%r, uuid=%r, locked=%r, age=%.2fs) at 0x%x>" \
      % (self.__class__.__name__,
         dump(self._ttid),
         dump(self._tid),
         uuid_str(self._uuid),
         self.isLocked(),
         time() - self._birth,
         id(self))
Beispiel #23
0
 def abortFor(self, uuid):
     """
         Abort any non-locked transaction of a node
     """
     logging.debug('Abort for %s', uuid_str(uuid))
     # abort any non-locked transaction of this node
     for transaction in self._transaction_dict.values():
         if transaction.getUUID() == uuid:
             self.abort(transaction.getTTID())
Beispiel #24
0
 def abort(self, ttid, uuid):
     """
         Abort a transaction
     """
     logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid))
     if self[ttid].isPrepared():
         raise ProtocolError("commit already requested for ttid %s"
                             % dump(ttid))
     del self[ttid]
Beispiel #25
0
 def lock(self, ttid, uuid):
     """
         Set that a node has locked the transaction.
         If transaction is completely locked, calls function given at
         instantiation time.
     """
     logging.debug('Lock TXN %s for %s', dump(ttid), uuid_str(uuid))
     if self[ttid].lock(uuid) and self._queue[0] == ttid:
         # all storage are locked and we unlock the commit queue
         self._unlockPending()
Beispiel #26
0
 def lock(self, ttid, uuid):
     """
         Set that a node has locked the transaction.
         If transaction is completely locked, calls function given at
         instanciation time.
     """
     logging.debug('Lock TXN %s for %s', dump(ttid), uuid_str(uuid))
     if self[ttid].lock(uuid) and self._queue[0] == ttid:
         # all storage are locked and we unlock the commit queue
         self._unlockPending()
Beispiel #27
0
    def _acceptIdentification(self, node, uuid, num_partitions, num_replicas, your_uuid, primary, known_master_list):
        app = self.app
        if primary != app.primary_master_node.getAddress():
            raise PrimaryFailure("unexpected primary uuid")

        if your_uuid != app.uuid:
            app.uuid = your_uuid
            logging.info("My UUID: " + uuid_str(your_uuid))

        node.setUUID(uuid)
Beispiel #28
0
 def register(self, uuid, ttid):
     """
         Register a transaction, it may be already registered
     """
     logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid))
     transaction = self._transaction_dict.get(ttid, None)
     if transaction is None:
         transaction = Transaction(uuid, ttid)
         self._uuid_dict.setdefault(uuid, set()).add(transaction)
         self._transaction_dict[ttid] = transaction
     return transaction
Beispiel #29
0
    def _acceptIdentification(self, node, uuid, num_partitions, num_replicas,
                              your_uuid, primary, known_master_list):
        app = self.app
        if primary != app.primary_master_node.getAddress():
            raise PrimaryFailure('unexpected primary uuid')

        if your_uuid != app.uuid:
            app.uuid = your_uuid
            logging.info('My UUID: ' + uuid_str(your_uuid))

        node.setUUID(uuid)
Beispiel #30
0
 def abort(self, ttid, uuid):
     """
         Abort a transaction
     """
     logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid))
     txn = self[ttid]
     if txn.isPrepared():
         raise ProtocolError("commit already requested for ttid %s"
                             % dump(ttid))
     del self[ttid]
     return txn._notification_set
Beispiel #31
0
 def register(self, uuid, ttid):
     """
         Register a transaction, it may be already registered
     """
     logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid))
     transaction = self._transaction_dict.get(ttid, None)
     if transaction is None:
         transaction = Transaction(uuid, ttid)
         self._uuid_dict.setdefault(uuid, set()).add(transaction)
         self._transaction_dict[ttid] = transaction
     return transaction
Beispiel #32
0
 def repair(self, conn, uuid_list, *args):
     getByUUID = self.app.nm.getByUUID
     node_list = []
     for uuid in uuid_list:
         node = getByUUID(uuid)
         if node is None or not (node.isStorage() and node.isIdentified()):
             raise ProtocolError("invalid storage node %s" % uuid_str(uuid))
         node_list.append(node)
     repair = Packets.NotifyRepair(*args)
     for node in node_list:
         node.send(repair)
     conn.answer(Errors.Ack(''))
Beispiel #33
0
 def log(cls):
     try:
         if cls.filter_queue:
             logging.info('%s:', cls.__name__)
             for conn, queue in cls.filter_queue.iteritems():
                 app = NEOThreadedTest.getConnectionApp(conn)
                 logging.info('  %s %s:', uuid_str(app.uuid), conn)
                 for p in queue:
                     logging.info('    #0x%04x %s', p.getId(),
                                  p.__class__.__name__)
     except Exception:
         logging.exception('')
Beispiel #34
0
 def nodeLost(self, node):
     getCellList = self.app.pt.getCellList
     trigger_set = set()
     for offset, primary_node in self.primary_partition_dict.items():
         if primary_node is not node:
             continue
         cell_list = getCellList(offset, readable=True)
         cell = max(cell_list, key=lambda cell: cell.backup_tid)
         tid = cell.backup_tid
         self.primary_partition_dict[offset] = primary_node = cell.getNode()
         p = Packets.Replicate(tid, '', {offset: primary_node.getAddress()})
         for cell in cell_list:
             cell.replicating = tid
             if cell.backup_tid < tid:
                 logging.debug(
                     "ask %s to replicate partition %u up to %s from %s",
                     uuid_str(cell.getUUID()), offset, dump(tid),
                     uuid_str(primary_node.getUUID()))
                 cell.getNode().send(p)
         trigger_set.add(primary_node)
     for node in trigger_set:
         self.triggerBackup(node)
Beispiel #35
0
 def nodeLost(self, node):
     getCellList = self.app.pt.getCellList
     trigger_set = set()
     for offset, primary_node in self.primary_partition_dict.items():
         if primary_node is not node:
             continue
         cell_list = getCellList(offset, readable=True)
         cell = max(cell_list, key=lambda cell: cell.backup_tid)
         tid = cell.backup_tid
         self.primary_partition_dict[offset] = primary_node = cell.getNode()
         p = Packets.Replicate(tid, '', {offset: primary_node.getAddress()})
         for cell in cell_list:
             cell.replicating = tid
             if cell.backup_tid < tid:
                 logging.debug(
                     "ask %s to replicate partition %u up to %s from %s",
                     uuid_str(cell.getUUID()), offset,  dump(tid),
                     uuid_str(primary_node.getUUID()))
                 cell.getNode().getConnection().notify(p)
         trigger_set.add(primary_node)
     for node in trigger_set:
         self.triggerBackup(node)
Beispiel #36
0
    def _acceptIdentification(self, node, peer_uuid, num_partitions,
                              num_replicas, your_uuid, primary,
                              known_master_list):
        app = self.app

        # Register new master nodes.
        for address, uuid in known_master_list:
            if app.server == address:
                # This is self.
                assert node.getAddress() != primary or uuid == your_uuid, (
                    uuid_str(uuid), uuid_str(your_uuid))
                continue
            n = app.nm.getByAddress(address)
            if n is None:
                n = app.nm.createMaster(address=address)

        if primary is not None:
            # The primary master is defined.
            if app.primary_master_node is not None \
                    and app.primary_master_node.getAddress() != primary:
                # There are multiple primary master nodes. This is
                # dangerous.
                raise ElectionFailure, 'multiple primary master nodes'
            primary_node = app.nm.getByAddress(primary)
            if primary_node is None:
                # I don't know such a node. Probably this information
                # is old. So ignore it.
                logging.warning('received an unknown primary node')
            else:
                # Whatever the situation is, I trust this master.
                app.primary = False
                app.primary_master_node = primary_node
                # Stop waiting for connections than primary master's to
                # complete to exit election phase ASAP.
                app.negotiating_master_node_set.clear()
                return

        self.elect(None, node.getAddress())
Beispiel #37
0
    def _acceptIdentification(self, node, peer_uuid, num_partitions,
            num_replicas, your_uuid, primary, known_master_list):
        app = self.app

        # Register new master nodes.
        for address, uuid in known_master_list:
            if app.server == address:
                # This is self.
                assert node.getAddress() != primary or uuid == your_uuid, (
                    uuid_str(uuid), uuid_str(your_uuid))
                continue
            n = app.nm.getByAddress(address)
            if n is None:
                n = app.nm.createMaster(address=address)

        if primary is not None:
            # The primary master is defined.
            if app.primary_master_node is not None \
                    and app.primary_master_node.getAddress() != primary:
                # There are multiple primary master nodes. This is
                # dangerous.
                raise ElectionFailure, 'multiple primary master nodes'
            primary_node = app.nm.getByAddress(primary)
            if primary_node is None:
                # I don't know such a node. Probably this information
                # is old. So ignore it.
                logging.warning('received an unknown primary node')
            else:
                # Whatever the situation is, I trust this master.
                app.primary = False
                app.primary_master_node = primary_node
                # Stop waiting for connections than primary master's to
                # complete to exit election phase ASAP.
                app.negotiating_master_node_set.clear()
                return

        self.elect(None, node.getAddress())
Beispiel #38
0
 def requestIdentification(self, conn, node_type, uuid, address, name):
     self.checkClusterName(name)
     # reject any incoming connections if not ready
     if not self.app.ready:
         raise NotReadyError
     app = self.app
     if uuid is None:
         if node_type != NodeTypes.STORAGE:
             raise ProtocolError('reject anonymous non-storage node')
         handler = StorageOperationHandler(self.app)
         conn.setHandler(handler)
     else:
         if uuid == app.uuid:
             raise ProtocolError("uuid conflict or loopback connection")
         node = app.nm.getByUUID(uuid)
         # If this node is broken, reject it.
         if node is not None and node.isBroken():
             raise BrokenNodeDisallowedError
         # choose the handler according to the node type
         if node_type == NodeTypes.CLIENT:
             handler = ClientOperationHandler
             if node is None:
                 node = app.nm.createClient(uuid=uuid)
             elif node.isConnected():
                 # This can happen if we haven't processed yet a notification
                 # from the master, telling us the existing node is not
                 # running anymore. If we accept the new client, we won't
                 # know what to do with this late notification.
                 raise NotReadyError('uuid conflict: retry later')
             node.setRunning()
         elif node_type == NodeTypes.STORAGE:
             if node is None:
                 logging.error('reject an unknown storage node %s',
                               uuid_str(uuid))
                 raise NotReadyError
             handler = StorageOperationHandler
         else:
             raise ProtocolError('reject non-client-or-storage node')
         # apply the handler and set up the connection
         handler = handler(self.app)
         conn.setHandler(handler)
         node.setConnection(conn, app.uuid < uuid)
     # accept the identification and trigger an event
     conn.answer(
         Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and app.uuid,
                                      app.pt.getPartitions(),
                                      app.pt.getReplicas(), uuid,
                                      app.master_node.getAddress(), ()))
     handler.connectionCompleted(conn)
Beispiel #39
0
 def requestIdentification(self, conn, node_type,
                                     uuid, address, name):
     self.checkClusterName(name)
     # reject any incoming connections if not ready
     if not self.app.ready:
         raise NotReadyError
     app = self.app
     if uuid is None:
         if node_type != NodeTypes.STORAGE:
             raise ProtocolError('reject anonymous non-storage node')
         handler = StorageOperationHandler(self.app)
         conn.setHandler(handler)
     else:
         if uuid == app.uuid:
             raise ProtocolError("uuid conflict or loopback connection")
         node = app.nm.getByUUID(uuid)
         # If this node is broken, reject it.
         if node is not None and node.isBroken():
             raise BrokenNodeDisallowedError
         # choose the handler according to the node type
         if node_type == NodeTypes.CLIENT:
             handler = ClientOperationHandler
             if node is None:
                 node = app.nm.createClient(uuid=uuid)
             elif node.isConnected():
                 # This can happen if we haven't processed yet a notification
                 # from the master, telling us the existing node is not
                 # running anymore. If we accept the new client, we won't
                 # know what to do with this late notification.
                 raise NotReadyError('uuid conflict: retry later')
             node.setRunning()
         elif node_type == NodeTypes.STORAGE:
             if node is None:
                 logging.error('reject an unknown storage node %s',
                     uuid_str(uuid))
                 raise NotReadyError
             handler = StorageOperationHandler
         else:
             raise ProtocolError('reject non-client-or-storage node')
         # apply the handler and set up the connection
         handler = handler(self.app)
         conn.setHandler(handler)
         node.setConnection(conn, app.uuid < uuid)
     # accept the identification and trigger an event
     conn.answer(Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and
         app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid,
         app.master_node.getAddress(), ()))
     handler.connectionCompleted(conn)
Beispiel #40
0
 def notifyNodeInformation(self, conn, timestamp, node_list):
     """Store information on nodes, only if this is sent by a primary
     master node."""
     super(BaseMasterHandler,
           self).notifyNodeInformation(conn, timestamp, node_list)
     for node_type, _, uuid, state, _ in node_list:
         if uuid == self.app.uuid:
             # This is me, do what the master tell me
             logging.info("I was told I'm %s", state)
             if state in (NodeStates.UNKNOWN, NodeStates.DOWN):
                 erase = state == NodeStates.UNKNOWN
                 self.app.shutdown(erase=erase)
         elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
             logging.info('Notified of non-running client, abort (%s)',
                          uuid_str(uuid))
             self.app.tm.abortFor(uuid)
Beispiel #41
0
 def abortFor(self, uuid):
     """
         Abort any non-locked transaction of a node
     """
     logging.debug('Abort for %s', uuid_str(uuid))
     # BUG: Discarding voted transactions must only be a decision of the
     #      master, and for this, we'll need to review how transactions are
     #      aborted. As a workaround, we rely on the fact that lock() will
     #      disconnect from the master in case of LockInformation.
     # abort any non-locked transaction of this node
     for ttid in [x.getTTID() for x in self._uuid_dict.get(uuid, [])]:
         self.abort(ttid)
     # cleanup _uuid_dict if no transaction remains for this node
     transaction_set = self._uuid_dict.get(uuid)
     if transaction_set is not None and not transaction_set:
         del self._uuid_dict[uuid]
Beispiel #42
0
 def abortFor(self, uuid):
     """
         Abort any non-locked transaction of a node
     """
     logging.debug('Abort for %s', uuid_str(uuid))
     # BUG: Discarding voted transactions must only be a decision of the
     #      master, and for this, we'll need to review how transactions are
     #      aborted. As a workaround, we rely on the fact that lock() will
     #      disconnect from the master in case of LockInformation.
     # abort any non-locked transaction of this node
     for ttid in [x.getTTID() for x in self._uuid_dict.get(uuid, [])]:
         self.abort(ttid)
     # cleanup _uuid_dict if no transaction remains for this node
     transaction_set = self._uuid_dict.get(uuid)
     if transaction_set is not None and not transaction_set:
         del self._uuid_dict[uuid]
Beispiel #43
0
    def _setupNode(self, conn, node_type, uuid, address, node):
        app = self.app
        if node:
            if node.isRunning():
                # cloned/evil/buggy node connecting to us
                raise ProtocolError('already connected')
            else:
                assert not node.isConnected()
            node.setAddress(address)
            node.setRunning()

        state = NodeStates.RUNNING
        if node_type == NodeTypes.CLIENT:
            if app.cluster_state != ClusterStates.RUNNING:
                raise NotReadyError
            handler = app.client_service_handler
            human_readable_node_type = ' client '
        elif node_type == NodeTypes.STORAGE:
            if app.cluster_state == ClusterStates.STOPPING_BACKUP:
                raise NotReadyError
            manager = app._current_manager
            if manager is None:
                manager = app
            state, handler = manager.identifyStorageNode(
                uuid is not None and node is not None)
            human_readable_node_type = ' storage (%s) ' % (state, )
        elif node_type == NodeTypes.MASTER:
            handler = app.secondary_master_handler
            human_readable_node_type = ' master '
        elif node_type == NodeTypes.ADMIN:
            handler = app.administration_handler
            human_readable_node_type = 'n admin '
        else:
            raise NotImplementedError(node_type)

        uuid = app.getNewUUID(uuid, address, node_type)
        logging.info('Accept a' + human_readable_node_type + uuid_str(uuid))
        if node is None:
            node = app.nm.createFromNodeType(node_type,
                uuid=uuid, address=address)
        node.setUUID(uuid)
        node.setState(state)
        node.setConnection(conn)
        conn.setHandler(handler)
        app.broadcastNodesInformation([node], node)
        return uuid
Beispiel #44
0
 def notifyNodeInformation(self, conn, node_list):
     """Store information on nodes, only if this is sent by a primary
     master node."""
     self.app.nm.update(node_list)
     for node_type, addr, uuid, state in node_list:
         if uuid == self.app.uuid:
             # This is me, do what the master tell me
             logging.info("I was told I'm %s", state)
             if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN,
                     NodeStates.BROKEN, NodeStates.UNKNOWN):
                 erase = state == NodeStates.DOWN
                 self.app.shutdown(erase=erase)
             elif state == NodeStates.HIDDEN:
                 raise StoppedOperation
         elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
             logging.info('Notified of non-running client, abort (%s)',
                     uuid_str(uuid))
             self.app.tm.abortFor(uuid)
Beispiel #45
0
 def notifyNodeInformation(self, conn, node_list):
     """Store information on nodes, only if this is sent by a primary
     master node."""
     self.app.nm.update(node_list)
     for node_type, addr, uuid, state in node_list:
         if uuid == self.app.uuid:
             # This is me, do what the master tell me
             logging.info("I was told I'm %s", state)
             if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN,
                          NodeStates.BROKEN, NodeStates.UNKNOWN):
                 erase = state == NodeStates.DOWN
                 self.app.shutdown(erase=erase)
             elif state == NodeStates.HIDDEN:
                 raise StoppedOperation
         elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
             logging.info('Notified of non-running client, abort (%s)',
                          uuid_str(uuid))
             self.app.tm.abortFor(uuid)
Beispiel #46
0
 def notifyReplicationDone(self, conn, offset, tid):
     app = self.app
     uuid = conn.getUUID()
     node = app.nm.getByUUID(uuid)
     if app.backup_tid:
         cell_list = app.backup_app.notifyReplicationDone(node, offset, tid)
         if not cell_list:
             return
     else:
         try:
             cell_list = self.app.pt.setUpToDate(node, offset)
         except PartitionTableException, e:
             raise ProtocolError(str(e))
         if not cell_list:
             logging.info("ignored late notification that"
                 " %s has replicated partition %s up to %s",
                 uuid_str(uuid), offset, dump(tid))
             return
Beispiel #47
0
 def answerStoreObject(self, conn, conflict, oid):
     txn_context = self.app.getHandlerData()
     if conflict:
         # Conflicts can not be resolved now because 'conn' is locked.
         # We must postpone the resolution (by queuing the conflict in
         # 'conflict_dict') to avoid any deadlock with another thread that
         # also resolves a conflict successfully to the same storage nodes.
         # Warning: if a storage (S1) is much faster than another (S2), then
         # we may process entirely a conflict with S1 (i.e. we received the
         # answer to the store of the resolved object on S1) before we
         # receive the conflict answer from the first store on S2.
         logging.info('%s reports a conflict on %s:%s with %s',
                      uuid_str(conn.getUUID()), dump(oid),
                      dump(txn_context.ttid), dump(conflict))
         # If this conflict is not already resolved, mark it for
         # resolution.
         if txn_context.resolved_dict.get(oid, '') < conflict:
             txn_context.conflict_dict[oid] = conflict
     else:
         txn_context.written(self.app, conn.getUUID(), oid)
Beispiel #48
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ', '.join(map(uuid_str, uuid_list))
     logging.debug('Add nodes %s', uuids)
     app = self.app
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList()
             if node.isPending() and node.getUUID() in uuid_list))
     if node_list:
         for node in node_list:
             node.setRunning()
             app.startStorage(node)
         app.broadcastNodesInformation(node_list)
         conn.answer(
             Errors.Ack('Nodes added: %s' %
                        ', '.join(uuid_str(x.getUUID())
                                  for x in node_list)))
     else:
         logging.warning('No node added')
         conn.answer(Errors.Ack('No node added'))
Beispiel #49
0
 def addPendingNodes(self, conn, uuid_list):
     uuids = ", ".join(map(uuid_str, uuid_list))
     logging.debug("Add nodes %s", uuids)
     app = self.app
     state = app.getClusterState()
     # XXX: Would it be safe to allow more states ?
     if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP):
         raise ProtocolError("Can not add nodes in %s state" % state)
     # take all pending nodes
     node_list = list(
         app.pt.addNodeList(
             node for node in app.nm.getStorageList() if node.isPending() and node.getUUID() in uuid_list
         )
     )
     if node_list:
         p = Packets.StartOperation(bool(app.backup_tid))
         for node in node_list:
             node.setRunning()
             node.notify(p)
         app.broadcastNodesInformation(node_list)
         conn.answer(Errors.Ack("Nodes added: %s" % ", ".join(uuid_str(x.getUUID()) for x in node_list)))
     else:
         logging.warning("No node added")
         conn.answer(Errors.Ack("No node added"))
Beispiel #50
0
 def invalidatePartitions(self, tid, partition_set):
     app = self.app
     prev_tid = app.getLastTransaction()
     app.setLastTransaction(tid)
     pt = app.pt
     trigger_set = set()
     untouched_dict = defaultdict(dict)
     for offset in xrange(pt.getPartitions()):
         try:
             last_max_tid = self.tid_list[offset][-1]
         except IndexError:
             last_max_tid = prev_tid
         if offset in partition_set:
             self.tid_list[offset].append(tid)
             node_list = []
             for cell in pt.getCellList(offset, readable=True):
                 node = cell.getNode()
                 assert node.isConnected(), node
                 if cell.backup_tid == prev_tid:
                     # Let's given 4 TID t0,t1,t2,t3: if a cell is only
                     # modified by t0 & t3 and has all data for t0, 4 values
                     # are possible for its 'backup_tid' until it replicates
                     # up to t3: t0, t1, t2 or t3 - 1
                     # Choosing the smallest one (t0) is easier to implement
                     # but when leaving backup mode, we would always lose
                     # data if the last full transaction does not modify
                     # all partitions. t1 is wrong for the same reason.
                     # So we have chosen the highest one (t3 - 1).
                     # t2 should also work but maybe harder to implement.
                     cell.backup_tid = add64(tid, -1)
                     logging.debug(
                         "partition %u: updating backup_tid of %r to %s",
                         offset, cell, dump(cell.backup_tid))
                 else:
                     assert cell.backup_tid < last_max_tid, (
                         cell.backup_tid, last_max_tid, prev_tid, tid)
                 if app.isStorageReady(node.getUUID()):
                     node_list.append(node)
             assert node_list
             trigger_set.update(node_list)
             # Make sure we have a primary storage for this partition.
             if offset not in self.primary_partition_dict:
                 self.primary_partition_dict[offset] = \
                     random.choice(node_list)
         else:
             # Partition not touched, so increase 'backup_tid' of all
             # "up-to-date" replicas, without having to replicate.
             for cell in pt.getCellList(offset, readable=True):
                 if last_max_tid <= cell.backup_tid:
                     cell.backup_tid = tid
                     untouched_dict[cell.getNode()][offset] = None
                 elif last_max_tid <= cell.replicating:
                     # Same for 'replicating' to avoid useless orders.
                     logging.debug("silently update replicating order"
                         " of %s for partition %u, up to %s",
                         uuid_str(cell.getUUID()), offset,  dump(tid))
                     cell.replicating = tid
     for node, untouched_dict in untouched_dict.iteritems():
         if app.isStorageReady(node.getUUID()):
             node.notify(Packets.Replicate(tid, '', untouched_dict))
     for node in trigger_set:
         self.triggerBackup(node)
     count = sum(map(len, self.tid_list))
     if self.debug_tid_count < count:
         logging.debug("Maximum number of tracked tids: %u", count)
         self.debug_tid_count = count
Beispiel #51
0
    def playPrimaryRole(self):
        logging.info('play the primary role with %r', self.listening_conn)
        self.master_address_dict.clear()
        em = self.em
        packet = Packets.AnnouncePrimary()
        for conn in em.getConnectionList():
            if conn.isListening():
                conn.setHandler(identification.IdentificationHandler(self))
            else:
                conn.notify(packet)
                # Primary master should rather establish connections to all
                # secondaries, rather than the other way around. This requires
                # a bit more work when a new master joins a cluster but makes
                # it easier to resolve UUID conflicts with minimal cluster
                # impact, and ensure primary master unicity (primary masters
                # become noisy, in that they actively try to maintain
                # connections to all other master nodes, so duplicate
                # primaries will eventually get in touch with each other and
                # resolve the situation with a duel).
                # TODO: only abort client connections, don't close server
                # connections as we want to have them in the end. Secondary
                # masters will reconnect nevertheless, but it's dirty.
                # Currently, it's not trivial to preserve connected nodes,
                # because of poor node status tracking during election.
                conn.abort()

        # If I know any storage node, make sure that they are not in the
        # running state, because they are not connected at this stage.
        for node in self.nm.getStorageList():
            if node.isRunning():
                node.setTemporarilyDown()

        if self.uuid is None:
            self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER)
            logging.info('My UUID: ' + uuid_str(self.uuid))
        else:
            in_conflict = self.nm.getByUUID(self.uuid)
            if in_conflict is not None:
                logging.warning('UUID conflict at election exit with %r',
                    in_conflict)
                in_conflict.setUUID(None)

        # Do not restart automatically if ElectionFailure is raised, in order
        # to avoid a split of the database. For example, with 2 machines with
        # a master and a storage on each one and replicas=1, the secondary
        # master becomes primary in case of network failure between the 2
        # machines but must not start automatically: otherwise, each storage
        # node would diverge.
        self._startup_allowed = False
        try:
            while True:
                self.runManager(RecoveryManager)
                try:
                    self.runManager(VerificationManager)
                    if not self.backup_tid:
                        self.provideService()
                        # self.provideService only returns without raising
                        # when switching to backup mode.
                    if self.backup_app is None:
                        raise RuntimeError("No upstream cluster to backup"
                                           " defined in configuration")
                    truncate = Packets.Truncate(
                        self.backup_app.provideService())
                except StoppedOperation, e:
                    logging.critical('No longer operational')
                    truncate = Packets.Truncate(*e.args) if e.args else None
                    # Automatic restart except if we truncate or retry to.
                    self._startup_allowed = not (self.truncate_tid or truncate)
                node_list = []
                for node in self.nm.getIdentifiedList():
                    if node.isStorage() or node.isClient():
                        conn = node.getConnection()
                        conn.notify(Packets.StopOperation())
                        if node.isClient():
                            conn.abort()
                            continue
                        if truncate:
                            conn.notify(truncate)
                        if node.isRunning():
                            node.setPending()
                            node_list.append(node)
                self.broadcastNodesInformation(node_list)
        except StateChangedException, e:
            assert e.args[0] == ClusterStates.STOPPING
            self.shutdown()
Beispiel #52
0
 def getPrimary(self, params):
     """
       Get primary master node.
     """
     return uuid_str(self.neoctl.getPrimary())
Beispiel #53
0
 def askPartitionList(self, conn, min_offset, max_offset, uuid):
     logging.info("ask partition list from %s to %s for %s",
                  min_offset, max_offset, uuid_str(uuid))
     self.app.sendPartitionTable(conn, min_offset, max_offset, uuid)
Beispiel #54
0
 def formatRowList(self, row_list):
     return '\n'.join('%03d | %s' % (offset,
         ''.join('%s - %s |' % (uuid_str(uuid), state)
         for (uuid, state) in cell_list))
         for (offset, cell_list) in row_list)