def setClusterState(self, conn, state): app = self.app # check request try: if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]: raise ProtocolError('Can not switch to this state') except KeyError: if state != ClusterStates.STOPPING: raise ProtocolError('Invalid state requested') # change state if state == ClusterStates.VERIFYING: storage_list = app.nm.getStorageList(only_identified=True) if not storage_list: raise ProtocolError('Cannot exit recovery without any ' 'storage node') for node in storage_list: assert node.isPending(), node if node.getConnection().isPending(): # XXX: It's wrong to use ProtocolError here. We must reply # less aggressively because the admin has no way to # know that there's still pending activity. raise ProtocolError('Cannot exit recovery now: node %r is ' 'entering cluster' % (node, )) app._startup_allowed = True state = app.cluster_state elif state == ClusterStates.STARTING_BACKUP: if app.tm.hasPending() or app.nm.getClientList(True): raise ProtocolError("Can not switch to %s state with pending" " transactions or connected clients" % state) conn.answer(Errors.Ack('Cluster state changed')) if state != app.cluster_state: raise StateChangedException(state)
def setNodeState(self, conn, uuid, state): logging.info("set node state for %s: %s", uuid_str(uuid), state) app = self.app node = app.nm.getByUUID(uuid) if node is None: raise ProtocolError('unknown node') if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()): raise ProtocolError('can not switch node to this state') if uuid == app.uuid: raise ProtocolError('can not kill primary master node') state_changed = state != node.getState() message = ('state changed' if state_changed else 'node already in %s state' % state) if node.isStorage(): keep = state == NodeStates.DOWN try: cell_list = app.pt.dropNodeList([node], keep) except PartitionTableException, e: raise ProtocolError(str(e)) node.setState(state) if node.isConnected(): # notify itself so it can shutdown node.send(Packets.NotifyNodeInformation( monotonic_time(), [node.asTuple()])) # close to avoid handle the closure as a connection lost node.getConnection().abort() if keep: cell_list = app.pt.outdate() elif cell_list: message = 'node permanently removed' app.broadcastPartitionChanges(cell_list)
def requestIdentification(self, conn, node_type, uuid, address, name, *_): if node_type != NodeTypes.ADMIN: raise ProtocolError("reject non-admin node") app = self.app try: backup = app.backup_dict[name] except KeyError: raise ProtocolError("unknown backup cluster %r" % name) if backup.conn is not None: raise ProtocolError("already connected") backup.conn = conn conn.setHandler(app.backup_handler) conn.answer(Packets.AcceptIdentification(NodeTypes.ADMIN, None, None))
def notifyReplicationDone(self, conn, offset, tid): app = self.app node = app.nm.getByUUID(conn.getUUID()) if app.backup_tid: cell_list = app.backup_app.notifyReplicationDone(node, offset, tid) if not cell_list: return else: try: cell_list = self.app.pt.setUpToDate(node, offset) if not cell_list: raise ProtocolError('Non-oudated partition') except PartitionTableException, e: raise ProtocolError(str(e))
def requestIdentification(self, conn, node_type, uuid, address, name): self.checkClusterName(name) # reject any incoming connections if not ready if not self.app.ready: raise NotReadyError app = self.app if uuid is None: if node_type != NodeTypes.STORAGE: raise ProtocolError('reject anonymous non-storage node') handler = StorageOperationHandler(self.app) conn.setHandler(handler) else: if uuid == app.uuid: raise ProtocolError("uuid conflict or loopback connection") node = app.nm.getByUUID(uuid) # If this node is broken, reject it. if node is not None and node.isBroken(): raise BrokenNodeDisallowedError # choose the handler according to the node type if node_type == NodeTypes.CLIENT: handler = ClientOperationHandler if node is None: node = app.nm.createClient(uuid=uuid) elif node.isConnected(): # This can happen if we haven't processed yet a notification # from the master, telling us the existing node is not # running anymore. If we accept the new client, we won't # know what to do with this late notification. raise NotReadyError('uuid conflict: retry later') node.setRunning() elif node_type == NodeTypes.STORAGE: if node is None: logging.error('reject an unknown storage node %s', uuid_str(uuid)) raise NotReadyError handler = StorageOperationHandler else: raise ProtocolError('reject non-client-or-storage node') # apply the handler and set up the connection handler = handler(self.app) conn.setHandler(handler) node.setConnection(conn, app.uuid < uuid) # accept the identification and trigger an event conn.answer( Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid, app.master_node.getAddress(), ())) handler.connectionCompleted(conn)
def requestIdentification(self, conn, node_type, uuid, address, name, id_timestamp, extra): app = self.app self.checkClusterName(name) if address == app.server: raise ProtocolError('address conflict') primary = app.primary_master.getAddress() if primary == address: primary = None elif not app.primary_master.isIdentified(): if node_type == NodeTypes.MASTER: node = app.nm.createMaster(address=address) if id_timestamp: conn.close() raise PrimaryElected(node) primary = None # For some cases, we rely on the fact that the remote will not retry # immediately (see SocketConnector.CONNECT_LIMIT). known_master_list = [ node.getAddress() for node in app.nm.getMasterList() ] conn.send( Packets.NotPrimaryMaster( primary and known_master_list.index(primary), known_master_list)) conn.abort()
def rebase(self, conn, ttid, locking_tid): self.register(conn, ttid) transaction = self._transaction_dict[ttid] if transaction.voted: raise ProtocolError("TXN %s already voted" % dump(ttid)) # First, get a set copy of serial_dict before _rebase locks oids. lock_set = set(transaction.serial_dict) self._rebase(transaction, transaction.locking_tid != MAX_TID and ttid, locking_tid) if transaction.locking_tid == MAX_TID: # New deadlock. There's no point rebasing objects now. return () # We return all oids that can't be relocked trivially # (the client will use RebaseObject for these oids). lock_set -= transaction.lockless # see comment in _rebase recheck_set = lock_set.intersection(self._store_lock_dict) lock_set -= recheck_set for oid in lock_set: try: serial = transaction.serial_dict[oid] except KeyError: # An oid was already being rebased and delayed, # and it got a conflict during the above call to _rebase. continue try: self.lockObject(ttid, serial, oid) except ConflictError: recheck_set.add(oid) return recheck_set
def sendPartitionTable(self, conn, ptid, num_replicas, row_list): app = self.app pt = app.pt pt.load(ptid, num_replicas, row_list, app.nm) if not pt.filled(): raise ProtocolError('Partial partition table received') # Install the partition table into the database for persistence. cell_list = [] unassigned = range(pt.getPartitions()) for offset in reversed(unassigned): for cell in pt.getCellList(offset): cell_list.append((offset, cell.getUUID(), cell.getState())) if cell.getUUID() == app.uuid: unassigned.remove(offset) # delete objects database dm = app.dm if unassigned: if app.disable_drop_partitions: logging.info( 'partitions %r are discarded but actual deletion' ' of data is disabled', unassigned) else: logging.debug('drop data for partitions %r', unassigned) dm.dropPartitions(unassigned) dm.changePartitionTable(ptid, num_replicas, cell_list, reset=True) dm.commit()
def addPendingNodes(self, conn, uuid_list): uuids = ', '.join(map(uuid_str, uuid_list)) logging.debug('Add nodes %s', uuids) app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ? if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP): raise ProtocolError('Can not add nodes in %s state' % state) # take all pending nodes node_list = list( app.pt.addNodeList( node for node in app.nm.getStorageList() if node.isPending() and node.getUUID() in uuid_list)) if node_list: p = Packets.StartOperation(bool(app.backup_tid)) for node in node_list: node.setRunning() node.notify(p) app.broadcastNodesInformation(node_list) conn.answer( Errors.Ack('Nodes added: %s' % ', '.join(uuid_str(x.getUUID()) for x in node_list))) else: logging.warning('No node added') conn.answer(Errors.Ack('No node added'))
def sendPartitionTable(self, conn, ptid, row_list): app = self.app pt = app.pt pt.load(ptid, row_list, app.nm) if not pt.filled(): raise ProtocolError('Partial partition table received') # Install the partition table into the database for persistence. cell_list = [] offset_list = xrange(pt.getPartitions()) unassigned_set = set(offset_list) for offset in offset_list: for cell in pt.getCellList(offset): cell_list.append((offset, cell.getUUID(), cell.getState())) if cell.getUUID() == app.uuid: unassigned_set.remove(offset) # delete objects database dm = app.dm if unassigned_set: if app.disable_drop_partitions: logging.info("don't drop data for partitions %r", unassigned_set) else: logging.debug('drop data for partitions %r', unassigned_set) dm.dropPartitions(unassigned_set) dm.changePartitionTable(ptid, cell_list, reset=True) dm.commit()
def __getitem__(self, ttid): """ Return the transaction object for this TID """ try: return self._ttid_dict[ttid] except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid))
def abort(self, ttid, uuid): """ Abort a transaction """ logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid)) if self[ttid].isPrepared(): raise ProtocolError("commit already requested for ttid %s" % dump(ttid)) del self[ttid]
def abort(self, ttid, uuid): """ Abort a transaction """ logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid)) txn = self[ttid] if txn.isPrepared(): raise ProtocolError("commit already requested for ttid %s" % dump(ttid)) del self[ttid] return txn._notification_set
def notifyPartitionChanges(self, conn, ptid, cell_list): """This is very similar to Send Partition Table, except that the information is only about changes from the previous.""" app = self.app if ptid != 1 + app.pt.getID(): raise ProtocolError('wrong partition table id') app.pt.update(ptid, cell_list, app.nm) app.dm.changePartitionTable(ptid, cell_list) if app.operational: app.replicator.notifyPartitionChanges(cell_list) app.dm.commit()
def tweakPartitionTable(self, conn, uuid_list): app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ? if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP): raise ProtocolError('Can not tweak partition table in %s state' % state) app.broadcastPartitionChanges( app.pt.tweak(map(app.nm.getByUUID, uuid_list))) conn.answer(Errors.Ack(''))
def repair(self, conn, uuid_list, *args): getByUUID = self.app.nm.getByUUID node_list = [] for uuid in uuid_list: node = getByUUID(uuid) if node is None or not (node.isStorage() and node.isIdentified()): raise ProtocolError("invalid storage node %s" % uuid_str(uuid)) node_list.append(node) repair = Packets.NotifyRepair(*args) for node in node_list: node.send(repair) conn.answer(Errors.Ack(''))
def askObjectHistory(self, conn, oid, first, last): if first >= last: raise ProtocolError('invalid offsets') app = self.app if app.tm.loadLocked(oid): raise DelayEvent history_list = app.dm.getObjectHistory(oid, first, last - first) if history_list is None: p = Errors.OidNotFound(dump(oid)) else: p = Packets.AnswerObjectHistory(oid, history_list) conn.answer(p)
def _askTIDs(self, first, last, partition): # This method is complicated, because I must return TIDs only # about usable partitions assigned to me. if first >= last: raise ProtocolError('invalid offsets') app = self.app if partition == INVALID_PARTITION: partition_list = app.pt.getAssignedPartitionList(app.uuid) else: partition_list = [partition] return app.dm.getTIDList(first, last - first, partition_list)
def tweakPartitionTable(self, conn, uuid_list): app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ? if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP): raise ProtocolError('Can not tweak partition table in %s state' % state) app.broadcastPartitionChanges(app.pt.tweak([node for node in app.nm.getStorageList() if node.getUUID() in uuid_list or not node.isRunning()])) conn.answer(Errors.Ack(''))
def unlock(self, ttid): """ Unlock transaction """ try: tid = self._transaction_dict[ttid].tid except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid)) logging.debug('Unlock TXN %s (ttid=%s)', dump(tid), dump(ttid)) dm = self._app.dm dm.unlockTransaction(tid, ttid) self._app.em.setTimeout(time() + 1, dm.deferCommit()) self.abort(ttid, even_if_locked=True)
def requestIdentification(self, conn, node_type, uuid, address, name, id_timestamp): self.checkClusterName(name) app = self.app # reject any incoming connections if not ready if not app.operational: raise NotReadyError if uuid is None: if node_type != NodeTypes.STORAGE: raise ProtocolError('reject anonymous non-storage node') handler = StorageOperationHandler(self.app) conn.setHandler(handler) else: if uuid == app.uuid: raise ProtocolError("uuid conflict or loopback connection") node = app.nm.getByUUID(uuid, id_timestamp) # choose the handler according to the node type if node_type == NodeTypes.CLIENT: if app.dm.getBackupTID(): handler = ClientReadOnlyOperationHandler else: handler = ClientOperationHandler assert not node.isConnected(), node assert node.isRunning(), node elif node_type == NodeTypes.STORAGE: handler = StorageOperationHandler else: raise ProtocolError('reject non-client-or-storage node') # apply the handler and set up the connection handler = handler(self.app) conn.setHandler(handler) node.setConnection(conn, app.uuid < uuid) # accept the identification and trigger an event conn.answer( Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid)) handler.connectionCompleted(conn)
def askStoreObject(self, conn, oid, serial, compression, checksum, data, data_serial, ttid, unlock): if 1 < compression: raise ProtocolError('invalid compression value') # register the transaction self.app.tm.register(conn.getUUID(), ttid) if data or checksum != ZERO_HASH: # TODO: return an appropriate error packet assert makeChecksum(data) == checksum assert data_serial is None else: checksum = data = None self._askStoreObject(conn, oid, serial, compression, checksum, data, data_serial, ttid, unlock, time.time())
def answerPartitionTable(self, conn, ptid, row_list): # If this is not from a target node, ignore it. if ptid == self.target_ptid: app = self.app try: new_nodes = app.pt.load(ptid, row_list, app.nm) except IndexError: raise ProtocolError('Invalid offset') self._notifyAdmins(Packets.NotifyNodeInformation(new_nodes), Packets.SendPartitionTable(ptid, row_list)) self.ask_pt = () uuid = conn.getUUID() app.backup_tid = self.backup_tid_dict[uuid] app.truncate_tid = self.truncate_dict[uuid]
def _acceptIdentification(self, node, uuid, num_partitions, num_replicas, your_uuid, primary, known_master_list): app = self.app # Register new master nodes. found = False conn_address = node.getAddress() for node_address, node_uuid in known_master_list: if node_address == conn_address: assert uuid == node_uuid, (dump(uuid), dump(node_uuid)) found = True n = app.nm.getByAddress(node_address) if n is None: n = app.nm.createMaster(address=node_address) if node_uuid is not None and n.getUUID() != node_uuid: n.setUUID(node_uuid) assert found, (node, dump(uuid), known_master_list) conn = node.getConnection() if primary is not None: primary_node = app.nm.getByAddress(primary) if primary_node is None: # I don't know such a node. Probably this information # is old. So ignore it. logging.warning('Unknown primary master: %s. Ignoring.', primary) return else: if app.trying_master_node is not primary_node: app.trying_master_node = None conn.close() app.primary_master_node = primary_node else: if app.primary_master_node is not None: # The primary master node is not a primary master node # any longer. app.primary_master_node = None app.trying_master_node = None conn.close() return # the master must give an UUID if your_uuid is None: raise ProtocolError('No UUID supplied') app.uuid = your_uuid logging.info('Got an UUID: %s', dump(app.uuid)) # Always create partition table app.pt = PartitionTable(num_partitions, num_replicas)
def askFetchObjects(self, conn, partition, length, min_tid, max_tid, min_oid, object_dict): app = self.app if app.tm.isLockedTid(max_tid): raise ProtocolError("transactions must be fetched before objects") msg_id = conn.getPeerId() conn = weakref.proxy(conn) dm = app.dm object_list = dm.getReplicationObjectList(min_tid, max_tid, length + 1, partition, min_oid) if length < len(object_list): next_tid, next_oid = object_list.pop() else: next_tid = next_oid = None def push(): try: pack_tid = None # TODO for serial, oid in object_list: oid_set = object_dict.get(serial) if oid_set: if type(oid_set) is tuple: object_dict[serial] = oid_set = set(oid_set) if oid in oid_set: oid_set.remove(oid) if not oid_set: del object_dict[serial] continue object = dm.fetchObject(oid, serial) if not object: conn.send( Errors.ReplicationError( "partition %u dropped or truncated" % partition), msg_id) return if not object[2]: # creation undone object = object[0], 0, ZERO_HASH, '', object[4] # Same as in askFetchTransactions. conn.send(Packets.AddObject(oid, *object), msg_id) yield conn.buffering conn.send( Packets.AnswerFetchObjects(pack_tid, next_tid, next_oid, object_dict), msg_id) yield except (weakref.ReferenceError, ConnectionClosed): pass app.newTask(push())
def lock(self, ttid, tid): """ Lock a transaction """ logging.debug('Lock TXN %s (ttid=%s)', dump(tid), dump(ttid)) try: transaction = self._transaction_dict[ttid] except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid)) assert transaction.tid is None, dump(transaction.tid) assert ttid <= tid, (ttid, tid) transaction.tid = tid self._load_lock_dict.update(dict.fromkeys(transaction.store_dict, ttid)) if transaction.voted == 2: self._app.dm.lockTransaction(tid, ttid)
def askCheckSerialRange(self, conn, *args): app = self.app if app.tm.isLockedTid(args[3]): # max_tid raise ProtocolError("transactions must be checked before objects") msg_id = conn.getPeerId() conn = weakref.proxy(conn) def check(): r = app.dm.checkSerialRange(*args) try: conn.send(Packets.AnswerCheckSerialRange(*r), msg_id) except (weakref.ReferenceError, ConnectionClosed): pass yield app.newTask(check())
def lock(self, ttid, tid): """ Lock a transaction """ logging.debug('Lock TXN %s (ttid=%s)', dump(tid), dump(ttid)) try: transaction = self._transaction_dict[ttid] except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid)) # remember that the transaction has been locked transaction.lock() self._load_lock_dict.update( dict.fromkeys(transaction.getOIDList(), ttid)) # commit transaction and remember its definitive TID if transaction.has_trans: self._app.dm.lockTransaction(tid, ttid) transaction.setTID(tid)
def _setupNode(self, conn, node_type, uuid, address, node): app = self.app if node: if node.isRunning(): # cloned/evil/buggy node connecting to us raise ProtocolError('already connected') else: assert not node.isConnected() node.setAddress(address) node.setRunning() state = NodeStates.RUNNING if node_type == NodeTypes.CLIENT: if app.cluster_state != ClusterStates.RUNNING: raise NotReadyError handler = app.client_service_handler human_readable_node_type = ' client ' elif node_type == NodeTypes.STORAGE: if app.cluster_state == ClusterStates.STOPPING_BACKUP: raise NotReadyError manager = app._current_manager if manager is None: manager = app state, handler = manager.identifyStorageNode( uuid is not None and node is not None) human_readable_node_type = ' storage (%s) ' % (state, ) elif node_type == NodeTypes.MASTER: handler = app.secondary_master_handler human_readable_node_type = ' master ' elif node_type == NodeTypes.ADMIN: handler = app.administration_handler human_readable_node_type = 'n admin ' else: raise NotImplementedError(node_type) uuid = app.getNewUUID(uuid, address, node_type) logging.info('Accept a' + human_readable_node_type + uuid_str(uuid)) if node is None: node = app.nm.createFromNodeType(node_type, uuid=uuid, address=address) node.setUUID(uuid) node.setState(state) node.setConnection(conn) conn.setHandler(handler) app.broadcastNodesInformation([node], node) return uuid
def notifyReplicationDone(self, conn, offset, tid): app = self.app uuid = conn.getUUID() node = app.nm.getByUUID(uuid) if app.backup_tid: cell_list = app.backup_app.notifyReplicationDone(node, offset, tid) if not cell_list: return else: try: cell_list = self.app.pt.setUpToDate(node, offset) except PartitionTableException, e: raise ProtocolError(str(e)) if not cell_list: logging.info("ignored late notification that" " %s has replicated partition %s up to %s", uuid_str(uuid), offset, dump(tid)) return