def answerHasLock(self, conn, oid, status): store_msg_id = self.app.getHandlerData()['timeout_dict'].pop(oid) if status == LockState.GRANTED_TO_OTHER: # Stop expecting the timed-out store request. self.app.dispatcher.forget(conn, store_msg_id) # Object is locked by another transaction, and we have waited until # timeout. To avoid a deadlock, abort current transaction (we might # be locking objects the other transaction is waiting for). raise ConflictError, 'Lock wait timeout for oid %s on %r' % ( dump(oid), conn) # HasLock design required that storage is multi-threaded so that # it can answer to AskHasLock while processing store resquests. # This means that the 2 cases (granted to us or nobody) are legitimate, # either because it gave us the lock but is/was slow to store our data, # or because the storage took a lot of time processing a previous # store (and did not even considered our lock request). # XXX: But storage nodes are still mono-threaded, so they should # only answer with GRANTED_TO_OTHER (if they reply!), except # maybe in very rare cases of race condition. Only log for now. # This also means that most of the time, if the storage is slow # to process some store requests, HasLock will timeout in turn # and the connector will be closed. # Anyway, it's not clear that HasLock requests are useful. # Are store requests potentially long to process ? If not, # we should simply raise a ConflictError on store timeout. logging.info('Store of oid %s delayed (storage overload ?)', dump(oid))
def notifyReplicationDone(self, node, offset, tid): app = self.app cell = app.pt.getCell(offset, node.getUUID()) tid_list = self.tid_list[offset] if tid_list: # may be empty if the cell is out-of-date # or if we're not fully initialized if tid < tid_list[0]: cell.replicating = tid else: try: tid = add64(tid_list[bisect(tid_list, tid)], -1) except IndexError: last_tid = app.getLastTransaction() if tid < last_tid: tid = last_tid node.send(Packets.Replicate(tid, '', {offset: None})) logging.debug("partition %u: updating backup_tid of %r to %s", offset, cell, dump(tid)) cell.backup_tid = tid # TODO: Provide invalidation feedback about new txns to read-only # clients connected to backup cluster. Not only here but also # hooked to in-progress feedback from fetchObjects (storage). # Forget tids we won't need anymore. cell_list = app.pt.getCellList(offset, readable=True) del tid_list[:bisect(tid_list, min(x.backup_tid for x in cell_list))] primary_node = self.primary_partition_dict.get(offset) primary = primary_node is node result = None if primary else app.pt.setUpToDate(node, offset) assert cell.isReadable() if result: # was out-of-date if primary_node is not None: max_tid, = [ x.backup_tid for x in cell_list if x.getNode() is primary_node ] if tid < max_tid: cell.replicating = max_tid logging.debug( "ask %s to replicate partition %u up to %s from %s", uuid_str(node.getUUID()), offset, dump(max_tid), uuid_str(primary_node.getUUID())) node.send( Packets.Replicate(max_tid, '', {offset: primary_node.getAddress()})) else: if app.getClusterState() == ClusterStates.BACKINGUP: self.triggerBackup(node) if primary: # Notify secondary storages that they can replicate from # primary ones, even if they are already replicating. p = Packets.Replicate(tid, '', {offset: node.getAddress()}) for cell in cell_list: if max(cell.backup_tid, cell.replicating) < tid: cell.replicating = tid logging.debug( "ask %s to replicate partition %u up to %s from %s", uuid_str(cell.getUUID()), offset, dump(tid), uuid_str(node.getUUID())) cell.getNode().send(p) return result
def fetchTransactions(self, min_tid=None): offset = self.current_partition p = self.partition_dict[offset] if min_tid: p.next_trans = min_tid else: try: addr, name = self.source_dict[offset] except KeyError: pass else: if addr != self.current_node.getAddress(): return self.abort() min_tid = p.next_trans self.replicate_tid = self.replicate_dict.pop(offset) logging.debug( "starting replication of <partition=%u" " min_tid=%s max_tid=%s> from %r", offset, dump(min_tid), dump(self.replicate_tid), self.current_node) max_tid = self.replicate_tid tid_list = self.app.dm.getReplicationTIDList(min_tid, max_tid, FETCH_COUNT, offset) self.current_node.getConnection().ask( Packets.AskFetchTransactions(offset, FETCH_COUNT, min_tid, max_tid, tid_list))
def fetchTransactions(self, min_tid=None): assert self.current_node.getConnection().isClient(), self.current_node offset = self.current_partition p = self.partition_dict[offset] if min_tid: # More than one chunk ? This could be a full replication so avoid # restarting from the beginning by committing now. self.app.dm.commit() p.next_trans = min_tid else: try: addr, name = self.source_dict[offset] except KeyError: pass else: if addr != self.current_node.getAddress(): return self.abort() min_tid = p.next_trans self.replicate_tid = self.replicate_dict.pop(offset) logging.debug( "starting replication of <partition=%u" " min_tid=%s max_tid=%s> from %r", offset, dump(min_tid), dump(self.replicate_tid), self.current_node) max_tid = self.replicate_tid tid_list = self.app.dm.getReplicationTIDList(min_tid, max_tid, FETCH_COUNT, offset) self._conn_msg_id = self.current_node.ask( Packets.AskFetchTransactions(offset, FETCH_COUNT, min_tid, max_tid, tid_list))
def abort(self, ttid, even_if_locked=False): """ Abort a transaction Releases locks held on all transaction objects, deletes Transaction instance, and executed queued events. Note: does not alter persistent content. """ if ttid not in self._transaction_dict: assert not even_if_locked # See how the master processes AbortTransaction from the client. return logging.debug('Abort TXN %s', dump(ttid)) transaction = self._transaction_dict[ttid] locked = transaction.tid # if the transaction is locked, ensure we can drop it if locked: if not even_if_locked: return else: dm = self._app.dm dm.abortTransaction(ttid) dm.releaseData([x[1] for x in transaction.store_dict.itervalues()], True) dm.commit() # unlock any object for oid in transaction.serial_dict: if locked: lock_ttid = self._load_lock_dict.pop(oid, None) assert lock_ttid in (ttid, None), ( 'Transaction %s tried' ' to release the lock on oid %s, but it was held by %s' % (dump(ttid), dump(oid), dump(lock_ttid))) try: write_locking_tid = self._store_lock_dict[oid] except KeyError: # Lockless store (we are replicating this partition), # or unresolved deadlock. continue if ttid != write_locking_tid: if __debug__: other = self._transaction_dict[write_locking_tid] x = (oid, ttid, write_locking_tid, self._replicated, transaction.lockless) lockless = oid in transaction.lockless assert oid in other.serial_dict and lockless == bool( self._replicated.get(self.getPartition(oid))), x if not lockless: assert not locked, x continue # unresolved deadlock # Several lockless stores for this oid and among them, # a higher ttid is still pending. assert transaction < other, x del self._store_lock_dict[oid] # remove the transaction del self._transaction_dict[ttid] if self._replicated: self._notifyReplicated() # some locks were released, some pending locks may now succeed self.read_queue.executeQueuedEvents() self.executeQueuedEvents()
def __repr__(self): error = self.error return ("<%s ttid=%s locking_tid=%s voted=%u" " #queue=%s #writing=%s #written=%s%s>") % ( self.__class__.__name__, dump(self.ttid), dump(self.locking_tid), self.voted, len(self.queue._queue), len(self.data_dict), len( self.cache_dict), ' error=%r' % error if error else '')
def logDelay(self, ttid, locked, oid_serial): if self._delayed.get(oid_serial) != locked: if self._delayed: self._delayed[oid_serial] = locked else: self._delayed = {oid_serial: locked} logging.info('Lock delayed for %s:%s by %s', dump(oid_serial[0]), dump(ttid), dump(locked))
def abort(self, ttid, uuid): """ Abort a transaction """ logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid)) if self[ttid].isPrepared(): raise ProtocolError("commit already requested for ttid %s" % dump(ttid)) del self[ttid]
def __repr__(self): return "<%s(ttid=%r, tid=%r, uuid=%r, locked=%r, age=%.2fs) at 0x%x>" \ % (self.__class__.__name__, dump(self._ttid), dump(self._tid), uuid_str(self._uuid), self.isLocked(), time() - self._birth, id(self))
def log(self): logging.info("Transactions:") for txn in self._transaction_dict.values(): logging.info(' %r', txn) logging.info(' Read locks:') for oid, ttid in self._load_lock_dict.items(): logging.info(' %r by %r', dump(oid), dump(ttid)) logging.info(' Write locks:') for oid, ttid in self._store_lock_dict.items(): logging.info(' %r by %r', dump(oid), dump(ttid))
def unlock(self, ttid): """ Unlock transaction """ tid = self._transaction_dict[ttid].getTID() logging.debug('Unlock TXN %s (ttid=%s)', dump(tid), dump(ttid)) dm = self._app.dm dm.unlockTransaction(tid, ttid) self._app.em.setTimeout(time() + 1, dm.deferCommit()) self.abort(ttid, even_if_locked=True)
def askHasLock(self, conn, ttid, oid): locking_tid = self.app.tm.getLockingTID(oid) logging.info('%r check lock of %r:%r', conn, dump(ttid), dump(oid)) if locking_tid is None: state = LockState.NOT_LOCKED elif locking_tid is ttid: state = LockState.GRANTED else: state = LockState.GRANTED_TO_OTHER conn.answer(Packets.AnswerHasLock(oid, state))
def checkRange(self, conn, *args): if self.conn_dict.get(conn, self) != conn.getPeerId(): # Ignore answers to old requests, # because we did nothing to cancel them. logging.info("ignored AnswerCheck*Range%r", args) return self.conn_dict[conn] = args answer_set = set(self.conn_dict.itervalues()) if len(answer_set) > 1: for answer in answer_set: if type(answer) is not tuple: return # TODO: Automatically tell corrupted cells to fix their data # if we know a good source. # For the moment, tell master to put them in CORRUPTED state # and keep up checking if useful. uuid = self.app.uuid args = None if self.source is None else self.conn_dict[ None if self.source.getUUID() == uuid else self.source.getConnection()] uuid_list = [] for conn, answer in self.conn_dict.items(): if answer != args: del self.conn_dict[conn] if conn is None: uuid_list.append(uuid) else: uuid_list.append(conn.getUUID()) self.app.closeClient(conn) p = Packets.NotifyPartitionCorrupted(self.partition, uuid_list) self.app.master_conn.send(p) if len(self.conn_dict) <= 1: logging.warning("check of partition %u aborted", self.partition) self.queue.clear() self._nextPartition() return try: count, _, max_tid = args except ValueError: # AnswerCheckSerialRange count, _, self.next_tid, _, max_oid = args if count < CHECK_COUNT: logging.debug("partition %u checked from %s to %s", self.partition, dump(self.min_tid), dump(self.max_tid)) self._nextPartition() return self.next_oid = add64(max_oid, 1) else: # AnswerCheckTIDRange if count < CHECK_COUNT: self.next_tid = self.min_tid self.next_oid = ZERO_OID else: self.next_tid = add64(max_tid, 1) self._nextRange()
def askStorage(conn, packet): tid, next_tid, compression, checksum, data, data_tid \ = self._askStorage(conn, packet) if data or checksum != ZERO_HASH: if checksum != makeChecksum(data): logging.error('wrong checksum from %s for oid %s', conn, dump(oid)) raise NEOStorageReadRetry(False) return (decompress_list[compression](data), tid, next_tid, data_tid) raise NEOStorageCreationUndoneError(dump(oid))
def notifyReplicationDone(self, node, offset, tid): app = self.app cell = app.pt.getCell(offset, node.getUUID()) tid_list = self.tid_list[offset] if tid_list: # may be empty if the cell is out-of-date # or if we're not fully initialized if tid < tid_list[0]: cell.replicating = tid else: try: tid = add64(tid_list[bisect(tid_list, tid)], -1) except IndexError: last_tid = app.getLastTransaction() if tid < last_tid: tid = last_tid node.notify(Packets.Replicate(tid, '', {offset: None})) logging.debug("partition %u: updating backup_tid of %r to %s", offset, cell, dump(tid)) cell.backup_tid = tid # Forget tids we won't need anymore. cell_list = app.pt.getCellList(offset, readable=True) del tid_list[:bisect(tid_list, min(x.backup_tid for x in cell_list))] primary_node = self.primary_partition_dict.get(offset) primary = primary_node is node result = None if primary else app.pt.setUpToDate(node, offset) assert cell.isReadable() if result: # was out-of-date if primary_node is not None: max_tid, = [x.backup_tid for x in cell_list if x.getNode() is primary_node] if tid < max_tid: cell.replicating = max_tid logging.debug( "ask %s to replicate partition %u up to %s from %s", uuid_str(node.getUUID()), offset, dump(max_tid), uuid_str(primary_node.getUUID())) node.notify(Packets.Replicate(max_tid, '', {offset: primary_node.getAddress()})) else: if app.getClusterState() == ClusterStates.BACKINGUP: self.triggerBackup(node) if primary: # Notify secondary storages that they can replicate from # primary ones, even if they are already replicating. p = Packets.Replicate(tid, '', {offset: node.getAddress()}) for cell in cell_list: if max(cell.backup_tid, cell.replicating) < tid: cell.replicating = tid logging.debug( "ask %s to replicate partition %u up to %s from %s", uuid_str(cell.getUUID()), offset, dump(tid), uuid_str(node.getUUID())) cell.getNode().notify(p) return result
def abort(self, ttid, uuid): """ Abort a transaction """ logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid)) txn = self[ttid] if txn.isPrepared(): raise ProtocolError("commit already requested for ttid %s" % dump(ttid)) del self[ttid] return txn._notification_set
def log(self): logging.info("Transactions:") for ttid, txn in self._transaction_dict.iteritems(): logging.info(' %s %r', dump(ttid), txn) logging.info(' Read locks:') for oid, ttid in self._load_lock_dict.iteritems(): logging.info(' %s by %s', dump(oid), dump(ttid)) logging.info(' Write locks:') for oid, ttid in self._store_lock_dict.iteritems(): logging.info(' %s by %s', dump(oid), dump(ttid)) self.logQueuedEvents() self.read_queue.logQueuedEvents()
def checkRange(self, conn, *args): if self.conn_dict.get(conn, self) != conn.getPeerId(): # Ignore answers to old requests, # because we did nothing to cancel them. logging.info("ignored AnswerCheck*Range%r", args) return self.conn_dict[conn] = args answer_set = set(self.conn_dict.itervalues()) if len(answer_set) > 1: for answer in answer_set: if type(answer) is not tuple: return # TODO: Automatically tell corrupted cells to fix their data # if we know a good source. # For the moment, tell master to put them in CORRUPTED state # and keep up checking if useful. uuid = self.app.uuid args = None if self.source is None else self.conn_dict[ None if self.source.getUUID() == uuid else self.source.getConnection()] uuid_list = [] for conn, answer in self.conn_dict.items(): if answer != args: del self.conn_dict[conn] if conn is None: uuid_list.append(uuid) else: uuid_list.append(conn.getUUID()) self.app.closeClient(conn) p = Packets.NotifyPartitionCorrupted(self.partition, uuid_list) self.app.master_conn.notify(p) if len(self.conn_dict) <= 1: logging.warning("check of partition %u aborted", self.partition) self.queue.clear() self._nextPartition() return try: count, _, max_tid = args except ValueError: # AnswerCheckSerialRange count, _, self.next_tid, _, max_oid = args if count < CHECK_COUNT: logging.debug("partition %u checked from %s to %s", self.partition, dump(self.min_tid), dump(self.max_tid)) self._nextPartition() return self.next_oid = add64(max_oid, 1) else: # AnswerCheckTIDRange if count < CHECK_COUNT: self.next_tid = self.min_tid self.next_oid = ZERO_OID else: self.next_tid = add64(max_tid, 1) self._nextRange()
def deadlock(self, storage_id, ttid, locking_tid): try: txn = self._ttid_dict[ttid] except KeyError: return if txn.locking_tid <= locking_tid: client = txn.getNode() txn.locking_tid = locking_tid = self._nextTID() logging.info('Deadlock avoidance triggered by %s for %s:' ' new locking tid for TXN %s is %s', uuid_str(storage_id), uuid_str(client.getUUID()), dump(ttid), dump(locking_tid)) client.send(Packets.NotifyDeadlock(ttid, locking_tid))
class ClientOperationHandler(BaseHandler): def askTransactionInformation(self, conn, tid): t = self.app.dm.getTransaction(tid) if t is None: p = Errors.TidNotFound('%s does not exist' % dump(tid)) else: p = Packets.AnswerTransactionInformation(tid, t[1], t[2], t[3], t[4], t[0]) conn.answer(p) def getEventQueue(self): # for read rpc return self.app.tm.read_queue def askObject(self, conn, oid, serial, tid): app = self.app if app.tm.loadLocked(oid): raise DelayEvent o = app.dm.getObject(oid, serial, tid) try: serial, next_serial, compression, checksum, data, data_serial = o except TypeError: p = (Errors.OidDoesNotExist if o is None else Errors.OidNotFound)( dump(oid)) else: if checksum is None: checksum = ZERO_HASH data = '' p = Packets.AnswerObject(oid, serial, next_serial, compression, checksum, data, data_serial) conn.answer(p) def askStoreTransaction(self, conn, ttid, *txn_info): self.app.tm.register(conn, ttid) self.app.tm.vote(ttid, txn_info) conn.answer(Packets.AnswerStoreTransaction()) def askVoteTransaction(self, conn, ttid): self.app.tm.vote(ttid) conn.answer(Packets.AnswerVoteTransaction()) def _askStoreObject(self, conn, oid, serial, compression, checksum, data, data_serial, ttid, request_time): try: self.app.tm.storeObject(ttid, serial, oid, compression, checksum, data, data_serial) except ConflictError, err: # resolvable or not conn.answer(Packets.AnswerStoreObject(err.tid)) return except NonReadableCell: logging.info('Ignore store of %s:%s by %s: unassigned partition', dump(oid), dump(serial), dump(ttid))
def unlock(self, ttid): """ Unlock transaction """ try: tid = self._transaction_dict[ttid].tid except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid)) logging.debug('Unlock TXN %s (ttid=%s)', dump(tid), dump(ttid)) dm = self._app.dm dm.unlockTransaction(tid, ttid) self._app.em.setTimeout(time() + 1, dm.deferCommit()) self.abort(ttid, even_if_locked=True)
def prepare(self, app, ttid, oid_list, checked_list, msg_id): """ Prepare a transaction to be finished """ txn = self[ttid] pt = app.pt failed = txn._failed if failed and not pt.operational(failed): return None, None ready = app.getStorageReadySet(txn._storage_readiness) getPartition = pt.getPartition partition_set = set(map(getPartition, oid_list)) partition_set.update(map(getPartition, checked_list)) partition_set.add(getPartition(ttid)) node_list = [] uuid_set = set() for partition in partition_set: for cell in pt.getCellList(partition): node = cell.getNode() if node.isIdentified(): uuid = node.getUUID() if uuid in uuid_set: continue if uuid in failed: # This will commit a new PT with outdated cells before # locking the transaction, which is important during # the verification phase. node.getConnection().close() elif uuid in ready: uuid_set.add(uuid) node_list.append(node) # A node that was not ready at the beginning of the transaction # can't have readable cells. And if we're still operational without # the 'failed' nodes, then there must still be 1 node in 'ready' # that is UP. assert node_list, (ready, failed) # maybe not the fastest but _queue should be often small if ttid in self._queue: tid = ttid else: tid = self._nextTID(ttid, pt.getPartitions()) self._queue.append(ttid) logging.debug('Finish TXN %s for %s (was %s)', dump(tid), txn.getNode(), dump(ttid)) txn.prepare(tid, oid_list, uuid_set, msg_id) # check if greater and foreign OID was stored if oid_list: self.setLastOID(max(oid_list)) return tid, node_list
def _acceptIdentification(self, node, uuid, num_partitions, num_replicas, your_uuid, primary, known_master_list): app = self.app # Register new master nodes. found = False conn_address = node.getAddress() for node_address, node_uuid in known_master_list: if node_address == conn_address: assert uuid == node_uuid, (dump(uuid), dump(node_uuid)) found = True n = app.nm.getByAddress(node_address) if n is None: n = app.nm.createMaster(address=node_address) if node_uuid is not None and n.getUUID() != node_uuid: n.setUUID(node_uuid) assert found, (node, dump(uuid), known_master_list) conn = node.getConnection() if primary is not None: primary_node = app.nm.getByAddress(primary) if primary_node is None: # I don't know such a node. Probably this information # is old. So ignore it. logging.warning('Unknown primary master: %s. Ignoring.', primary) return else: if app.trying_master_node is not primary_node: app.trying_master_node = None conn.close() app.primary_master_node = primary_node else: if app.primary_master_node is not None: # The primary master node is not a primary master node # any longer. app.primary_master_node = None app.trying_master_node = None conn.close() return # the master must give an UUID if your_uuid is None: raise ProtocolError('No UUID supplied') app.uuid = your_uuid logging.info('Got an UUID: %s', dump(app.uuid)) # Always create partition table app.pt = PartitionTable(num_partitions, num_replicas)
def triggerBackup(self, node): tid_list = self.tid_list tid = self.app.getLastTransaction() replicate_list = [] for offset, cell in self.app.pt.iterNodeCell(node): max_tid = tid_list[offset] if max_tid and self.primary_partition_dict[offset] is node and \ max(cell.backup_tid, cell.replicating) < max_tid[-1]: cell.replicating = tid replicate_list.append(offset) if not replicate_list: return getCellList = self.pt.getCellList source_dict = {} address_set = set() for offset in replicate_list: cell_list = getCellList(offset, readable=True) random.shuffle(cell_list) assert cell_list, offset for cell in cell_list: addr = cell.getAddress() if addr in address_set: break else: address_set.add(addr) source_dict[offset] = addr logging.debug("ask %s to replicate partition %u up to %s from %r", uuid_str(node.getUUID()), offset, dump(tid), addr) node.send(Packets.Replicate(tid, self.name, source_dict))
def triggerBackup(self, node): tid_list = self.tid_list tid = self.app.getLastTransaction() replicate_list = [] for offset, cell in self.app.pt.iterNodeCell(node): max_tid = tid_list[offset] if max_tid and self.primary_partition_dict[offset] is node and \ max(cell.backup_tid, cell.replicating) < max_tid[-1]: cell.replicating = tid replicate_list.append(offset) if not replicate_list: return getCellList = self.pt.getCellList source_dict = {} address_set = set() for offset in replicate_list: cell_list = getCellList(offset, readable=True) random.shuffle(cell_list) assert cell_list, offset for cell in cell_list: addr = cell.getAddress() if addr in address_set: break else: address_set.add(addr) source_dict[offset] = addr logging.debug("ask %s to replicate partition %u up to %s from %r", uuid_str(node.getUUID()), offset, dump(tid), addr) node.getConnection().notify(Packets.Replicate( tid, self.name, source_dict))
def rebase(self, conn, ttid, locking_tid): self.register(conn, ttid) transaction = self._transaction_dict[ttid] if transaction.voted: raise ProtocolError("TXN %s already voted" % dump(ttid)) # First, get a set copy of serial_dict before _rebase locks oids. lock_set = set(transaction.serial_dict) self._rebase(transaction, transaction.locking_tid != MAX_TID and ttid, locking_tid) if transaction.locking_tid == MAX_TID: # New deadlock. There's no point rebasing objects now. return () # We return all oids that can't be relocked trivially # (the client will use RebaseObject for these oids). lock_set -= transaction.lockless # see comment in _rebase recheck_set = lock_set.intersection(self._store_lock_dict) lock_set -= recheck_set for oid in lock_set: try: serial = transaction.serial_dict[oid] except KeyError: # An oid was already being rebased and delayed, # and it got a conflict during the above call to _rebase. continue try: self.lockObject(ttid, serial, oid) except ConflictError: recheck_set.add(oid) return recheck_set
def lock(self, ttid, tid): """ Lock a transaction """ logging.debug('Lock TXN %s (ttid=%s)', dump(tid), dump(ttid)) try: transaction = self._transaction_dict[ttid] except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid)) assert transaction.tid is None, dump(transaction.tid) assert ttid <= tid, (ttid, tid) transaction.tid = tid self._load_lock_dict.update(dict.fromkeys(transaction.store_dict, ttid)) if transaction.voted == 2: self._app.dm.lockTransaction(tid, ttid)
def answerStoreObject(self, conn, conflicting, oid, serial): txn_context = self.app.getHandlerData() object_stored_counter_dict = txn_context['object_stored_counter_dict'][ oid] if conflicting: # Warning: if a storage (S1) is much faster than another (S2), then # we may process entirely a conflict with S1 (i.e. we received the # answer to the store of the resolved object on S1) before we # receive the conflict answer from the first store on S2. logging.info('%r report a conflict for %r with %r', conn, dump(oid), dump(serial)) # If this conflict is not already resolved, mark it for # resolution. if serial not in txn_context['resolved_conflict_serial_dict'].get( oid, ()): if serial in object_stored_counter_dict and serial != ZERO_TID: raise NEOStorageError( 'Storages %s accepted object %s' ' for serial %s but %s reports a conflict for it.' % (map(dump, object_stored_counter_dict[serial]), dump(oid), dump(serial), dump(conn.getUUID()))) conflict_serial_dict = txn_context['conflict_serial_dict'] conflict_serial_dict.setdefault(oid, set()).add(serial) else: uuid_set = object_stored_counter_dict.get(serial) if uuid_set is None: # store to first storage node object_stored_counter_dict[serial] = uuid_set = set() try: data = txn_context['data_dict'].pop(oid) except KeyError: # multiple undo assert txn_context['cache_dict'][oid] is None, oid else: if type(data) is str: size = len(data) txn_context['data_size'] -= size size += txn_context['cache_size'] if size < self.app._cache._max_size: txn_context['cache_size'] = size else: # Do not cache data past cache max size, as it # would just flush it on tpc_finish. This also # prevents memory errors for big transactions. data = None txn_context['cache_dict'][oid] = data else: # replica assert oid not in txn_context['data_dict'], oid uuid_set.add(conn.getUUID())
def __getitem__(self, ttid): """ Return the transaction object for this TID """ try: return self._ttid_dict[ttid] except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid))
def register(self, conn, ttid): """ Register a transaction, it may be already registered """ if ttid not in self._transaction_dict: uuid = conn.getUUID() logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid)) self._transaction_dict[ttid] = Transaction(uuid, ttid)
def lock(self, ttid, tid): """ Lock a transaction """ logging.debug('Lock TXN %s (ttid=%s)', dump(tid), dump(ttid)) try: transaction = self._transaction_dict[ttid] except KeyError: raise ProtocolError("unknown ttid %s" % dump(ttid)) # remember that the transaction has been locked transaction.lock() self._load_lock_dict.update( dict.fromkeys(transaction.getOIDList(), ttid)) # commit transaction and remember its definitive TID if transaction.has_trans: self._app.dm.lockTransaction(tid, ttid) transaction.setTID(tid)
def checkReplicas(self, conn, partition_dict, min_tid, max_tid): app = self.app pt = app.pt backingup = bool(app.backup_tid) if not max_tid: max_tid = pt.getCheckTid(partition_dict) if backingup else \ app.getLastTransaction() if min_tid > max_tid: logging.warning("nothing to check: min_tid=%s > max_tid=%s", dump(min_tid), dump(max_tid)) else: getByUUID = app.nm.getByUUID node_set = set() for offset, source in partition_dict.iteritems(): # XXX: For the moment, code checking replicas is unable to fix # corrupted partitions (when a good cell is known) # so only check readable ones. # (see also Checker._nextPartition of storage) cell_list = pt.getCellList(offset, True) #cell_list = [cell for cell in pt.getCellList(offset) # if not cell.isOutOfDate()] if len(cell_list) + (backingup and not source) <= 1: continue for cell in cell_list: node = cell.getNode() if node in node_set: break else: node_set.add(node) if source: source = '', getByUUID(source).getAddress() else: readable = [ cell for cell in cell_list if cell.isReadable() ] if 1 == len(readable) < len(cell_list): source = '', readable[0].getAddress() elif backingup: source = app.backup_app.name, random.choice( app.backup_app.pt.getCellList( offset, readable=True)).getAddress() else: source = '', None node.getConnection().notify( Packets.CheckPartition(offset, source, min_tid, max_tid)) conn.answer(Errors.Ack(''))
def askTransactionInformation(self, conn, tid): t = self.app.dm.getTransaction(tid) if t is None: p = Errors.TidNotFound('%s does not exist' % dump(tid)) else: p = Packets.AnswerTransactionInformation(tid, t[1], t[2], t[3], bool(t[4]), t[0]) conn.answer(p)
def answerStoreObject(self, conn, conflicting, oid, serial): txn_context = self.app.getHandlerData() object_stored_counter_dict = txn_context[ 'object_stored_counter_dict'][oid] if conflicting: # Warning: if a storage (S1) is much faster than another (S2), then # we may process entirely a conflict with S1 (i.e. we received the # answer to the store of the resolved object on S1) before we # receive the conflict answer from the first store on S2. logging.info('%r report a conflict for %r with %r', conn, dump(oid), dump(serial)) # If this conflict is not already resolved, mark it for # resolution. if serial not in txn_context[ 'resolved_conflict_serial_dict'].get(oid, ()): if serial in object_stored_counter_dict and serial != ZERO_TID: raise NEOStorageError('Storages %s accepted object %s' ' for serial %s but %s reports a conflict for it.' % ( map(dump, object_stored_counter_dict[serial]), dump(oid), dump(serial), dump(conn.getUUID()))) conflict_serial_dict = txn_context['conflict_serial_dict'] conflict_serial_dict.setdefault(oid, set()).add(serial) else: uuid_set = object_stored_counter_dict.get(serial) if uuid_set is None: # store to first storage node object_stored_counter_dict[serial] = uuid_set = set() try: data = txn_context['data_dict'].pop(oid) except KeyError: # multiple undo assert txn_context['cache_dict'][oid] is None, oid else: if type(data) is str: size = len(data) txn_context['data_size'] -= size size += txn_context['cache_size'] if size < self.app._cache._max_size: txn_context['cache_size'] = size else: # Do not cache data past cache max size, as it # would just flush it on tpc_finish. This also # prevents memory errors for big transactions. data = None txn_context['cache_dict'][oid] = data else: # replica assert oid not in txn_context['data_dict'], oid uuid_set.add(conn.getUUID())
def askTransactionInformation(self, conn, tid): t = self.app.dm.getTransaction(tid) if t is None: p = Errors.TidNotFound('%s does not exist' % dump(tid)) else: p = Packets.AnswerTransactionInformation(tid, t[1], t[2], t[3], t[4], t[0]) conn.answer(p)
def _loadFromStorage(self, oid, at_tid, before_tid): packet = Packets.AskObject(oid, at_tid, before_tid) for node, conn in self.cp.iterateForObject(oid, readable=True): try: tid, next_tid, compression, checksum, data, data_tid \ = self._askStorage(conn, packet) except ConnectionClosed: continue if data or checksum != ZERO_HASH: if checksum != makeChecksum(data): logging.error('wrong checksum from %s for oid %s', conn, dump(oid)) continue return (decompress(data) if compression else data, tid, next_tid, data_tid) raise NEOStorageCreationUndoneError(dump(oid)) raise NEOStorageError("storage down or corrupted data")
def prepare(self, ttid, divisor, oid_list, uuid_list, msg_id): """ Prepare a transaction to be finished """ txn = self[ttid] # maybe not the fastest but _queue should be often small if ttid in self._queue: tid = ttid else: tid = self._nextTID(ttid, divisor) self._queue.append(ttid) logging.debug('Finish TXN %s for %s (was %s)', dump(tid), txn.getNode(), dump(ttid)) txn.prepare(tid, oid_list, uuid_list, msg_id) # check if greater and foreign OID was stored if oid_list: self.setLastOID(max(oid_list)) return tid
def __repr__(self): return "<%s(client=%r, tid=%r, oids=%r, storages=%r, age=%.2fs) at %x>" % ( self.__class__.__name__, self._node, dump(self._tid), map(dump, self._oid_list or ()), map(uuid_str, self._uuid_set or ()), time() - self._birth, id(self), )
def lock(self, ttid, uuid): """ Set that a node has locked the transaction. If transaction is completely locked, calls function given at instanciation time. """ logging.debug('Lock TXN %s for %s', dump(ttid), uuid_str(uuid)) if self[ttid].lock(uuid) and self._queue[0] == ttid: # all storage are locked and we unlock the commit queue self._unlockPending()
def tpc_vote(self, transaction, tryToResolveConflict): """Store current transaction.""" txn_context = self._txn_container.get(transaction) result = self.waitStoreResponses(txn_context, tryToResolveConflict) ttid = txn_context['ttid'] # Store data on each node assert not txn_context['data_dict'], txn_context packet = Packets.AskStoreTransaction(ttid, str(transaction.user), str(transaction.description), dumps(transaction._extension), txn_context['cache_dict']) queue = txn_context['queue'] trans_nodes = [] for node, conn in self.cp.iterateForObject(ttid): logging.debug("voting transaction %s on %s", dump(ttid), dump(conn.getUUID())) try: conn.ask(packet, queue=queue) except ConnectionClosed: continue trans_nodes.append(node) # check at least one storage node accepted if trans_nodes: involved_nodes = txn_context['involved_nodes'] packet = Packets.AskVoteTransaction(ttid) for node in involved_nodes.difference(trans_nodes): conn = self.cp.getConnForNode(node) if conn is not None: try: conn.ask(packet, queue=queue) except ConnectionClosed: pass involved_nodes.update(trans_nodes) self.waitResponses(queue) txn_context['voted'] = None # We must not go further if connection to master was lost since # tpc_begin, to lower the probability of failing during tpc_finish. if 'error' in txn_context: raise NEOStorageError(txn_context['error']) return result logging.error('tpc_vote failed') raise NEOStorageError('tpc_vote failed')
def register(self, uuid, ttid): """ Register a transaction, it may be already registered """ logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid)) transaction = self._transaction_dict.get(ttid, None) if transaction is None: transaction = Transaction(uuid, ttid) self._uuid_dict.setdefault(uuid, set()).add(transaction) self._transaction_dict[ttid] = transaction return transaction
def _nextTID(self, ttid=None, divisor=None): """ Compute the next TID based on the current time and check collisions. Also, if ttid is not None, divisor is mandatory adjust it so that tid % divisor == ttid % divisor while preserving min_tid < tid If ttid is None, divisor is ignored. When constraints allow, prefer decreasing generated TID, to avoid fast-forwarding to future dates. """ tid = tidFromTime(time()) min_tid = self._last_tid if tid <= min_tid: tid = addTID(min_tid, 1) # We know we won't have room to adjust by decreasing. try_decrease = False else: try_decrease = True if ttid is not None: assert isinstance(ttid, basestring), repr(ttid) assert isinstance(divisor, (int, long)), repr(divisor) ref_remainder = u64(ttid) % divisor remainder = u64(tid) % divisor if ref_remainder != remainder: if try_decrease: new_tid = addTID(tid, ref_remainder - divisor - remainder) assert u64(new_tid) % divisor == ref_remainder, (dump(new_tid), ref_remainder) if new_tid <= min_tid: new_tid = addTID(new_tid, divisor) else: if ref_remainder > remainder: ref_remainder += divisor new_tid = addTID(tid, ref_remainder - remainder) assert min_tid < new_tid, (dump(min_tid), dump(tid), dump(new_tid)) tid = new_tid self._last_tid = tid return self._last_tid
def finish(): if tid: self.storeTransaction(tid, object_list, ( (x[0] for x in object_list), str(txn.user), str(txn.description), cPickle.dumps(txn.extension), False, tid), False) self.releaseData(data_id_list) logging.debug("TXN %s imported (user=%r, desc=%r, len(oid)=%s)", util.dump(tid), txn.user, txn.description, len(object_list)) del object_list[:], data_id_list[:] if self._last_commit + 1 < time.time(): self.commit() self.zodb_tid = u64(tid)
def fetchTransactions(self, min_tid=None): offset = self.current_partition p = self.partition_dict[offset] if min_tid: p.next_trans = min_tid else: try: addr, name = self.source_dict[offset] except KeyError: pass else: if addr != self.current_node.getAddress(): return self.abort() min_tid = p.next_trans self.replicate_tid = self.replicate_dict.pop(offset) logging.debug("starting replication of <partition=%u" " min_tid=%s max_tid=%s> from %r", offset, dump(min_tid), dump(self.replicate_tid), self.current_node) max_tid = self.replicate_tid tid_list = self.app.dm.getReplicationTIDList(min_tid, max_tid, FETCH_COUNT, offset) self.current_node.getConnection().ask(Packets.AskFetchTransactions( offset, FETCH_COUNT, min_tid, max_tid, tid_list))
def askObjectHistory(self, conn, oid, first, last): if first >= last: raise ProtocolError('invalid offsets') app = self.app if app.tm.loadLocked(oid): # Delay the response. app.queueEvent(self.askObjectHistory, conn, (oid, first, last)) return history_list = app.dm.getObjectHistory(oid, first, last - first) if history_list is None: p = Errors.OidNotFound(dump(oid)) else: p = Packets.AnswerObjectHistory(oid, history_list) conn.answer(p)
def finish(self): offset = self.current_partition tid = self.replicate_tid del self.current_partition, self.replicate_tid p = self.partition_dict[offset] p.next_obj = add64(tid, 1) self.updateBackupTID() if not p.max_ttid: p = Packets.NotifyReplicationDone(offset, tid) self.app.master_conn.notify(p) logging.debug("partition %u replicated up to %s from %r", offset, dump(tid), self.current_node) self.getCurrentConnection().setReconnectionNoDelay() self._nextPartition()
def vote(self, ttid, txn_info=None): """ Store transaction information received from client node """ logging.debug('Vote TXN %s', dump(ttid)) transaction = self._transaction_dict[ttid] object_list = transaction.getObjectList() if txn_info: user, desc, ext, oid_list = txn_info txn_info = oid_list, user, desc, ext, False, ttid transaction.has_trans = True # store metadata to temporary table dm = self._app.dm dm.storeTransaction(ttid, object_list, txn_info) dm.commit()
def askObjectUndoSerial(self, conn, ttid, ltid, undone_tid, oid_list): app = self.app findUndoTID = app.dm.findUndoTID getObjectFromTransaction = app.tm.getObjectFromTransaction object_tid_dict = {} for oid in oid_list: current_serial, undo_serial, is_current = findUndoTID(oid, ttid, ltid, undone_tid, getObjectFromTransaction(ttid, oid)) if current_serial is None: p = Errors.OidNotFound(dump(oid)) break object_tid_dict[oid] = (current_serial, undo_serial, is_current) else: p = Packets.AnswerObjectUndoSerial(object_tid_dict) conn.answer(p)
def stop(self): # Close any open connection to an upstream storage, # possibly aborting current replication. node = self.current_node if node is not None is node.getUUID(): self.cancel() # Cancel all replication orders from upstream cluster. for offset in self.replicate_dict.keys(): addr, name = self.source_dict.get(offset, (None, None)) if name: tid = self.replicate_dict.pop(offset) logging.info('cancel replication of partition %u from %r' ' up to %s', offset, addr, dump(tid)) # Make UP_TO_DATE cells really UP_TO_DATE self._nextPartition()
def askObject(self, conn, oid, serial, tid): app = self.app if app.tm.loadLocked(oid): # Delay the response. app.queueEvent(self.askObject, conn, (oid, serial, tid)) return o = app.dm.getObject(oid, serial, tid) try: serial, next_serial, compression, checksum, data, data_serial = o except TypeError: p = (Errors.OidDoesNotExist if o is None else Errors.OidNotFound)(dump(oid)) else: if checksum is None: checksum = ZERO_HASH data = '' p = Packets.AnswerObject(oid, serial, next_serial, compression, checksum, data, data_serial) conn.answer(p)