def _find_missing_references_from_pickles(destination, pickles, permanently_gone): # Return a set of objects missing from the database given the # pickle states of other objects. # *permanently_gone* is a set of oid byte strings that are # known to be missing and shouldn't be investigated and returned. oids = [] for pickle in pickles: # Support zc.zlibstorage wrappers. if pickle.startswith(b'.z'): pickle = zlib.decompress(pickle[2:]) get_oids_referenced_by_pickle(pickle, oids) logger.info( "Given %d pickles, there are %d unique references.", len(pickles), len(oids) ) missing_oids = OidSet() destination.prefetch(oids) for oid in oids: if oid in permanently_gone: continue try: state, tid = destination.load(oid, b'') if is_blob_record(state): destination.loadBlob(oid, tid) except POSKeyError: missing_oids.add(bytes8_to_int64(oid)) logger.info( "Given %d pickles, there are %d missing references.", len(pickles), len(missing_oids) ) return missing_oids
def __copy_transaction(self, other, trans): # Originally adapted from ZODB.blob.BlobStorageMixin tpc = self.tpc num_txn_records = 0 txn_data_size = 0 num_blobs = 0 tmp_blobs_to_rm = [] tpc.tpc_begin(trans, trans.tid, trans.status) for record in trans: num_txn_records += 1 if record.data: txn_data_size += len(record.data) blobfile = None if is_blob_record(record.data): try: blobfile = other.openCommittedBlobFile( record.oid, record.tid) except POSKeyError: logger.exception("Failed to open blob to copy") if blobfile is not None: fd, name = tempfile.mkstemp( suffix='.tmp', dir=self.blobhelper.temporaryDirectory()) tmp_blobs_to_rm.append(name) logger.log(TRACE, "Copying %s to temporary blob file %s for upload", blobfile, name) with os.fdopen(fd, 'wb') as target: # If we don't get the length, ``copy_blob`` will. old_pos = blobfile.tell() blobfile.seek(0, 2) length = blobfile.tell() blobfile.seek(old_pos) copy_blob(blobfile, target, length) txn_data_size += length blobfile.close() self.restore.restoreBlob(record.oid, record.tid, record.data, name, record.data_txn, trans) else: self.restore.restore(record.oid, record.tid, record.data, '', record.data_txn, trans) tpc.tpc_vote(trans) tpc.tpc_finish(trans) num_blobs = len(tmp_blobs_to_rm) if num_blobs: for tmp_blob in tmp_blobs_to_rm: logger.log(TRACE, "Removing temporary blob file %s", tmp_blob) try: os.unlink(tmp_blob) except OSError: pass return num_txn_records, txn_data_size, num_blobs
def restore_one(self, active_txn_meta, oid, tid, data): # The signature for both ``restore`` and ``restoreBlob`` # is: # # (oid, serial, data, (blobfilename|prev_txn), version, txn) # # Where ``txn`` is the TransactionMetaData object # originally passed to ``tpc_begin``. It is only used to # check that the same object has been passed. # # ``prev_txn`` is not used but would come from ``record.data_txn`` txn_data_size = len(data) if data else 0 blobfile = None if is_blob_record(data): try: blobfile = self.storage.openCommittedBlobFile( oid, tid) except POSKeyError: # pragma: no cover logger.exception("Failed to open blob to copy") # We may not be able to read the data after this. data = self.restore._crs_transform_record_data(data) if blobfile is not None: fd, name = tempfile.mkstemp( suffix='.tmp', dir=self.blobhelper.temporaryDirectory() ) self.temp_blobs_to_rm.append(name) logger.log( TRACE, "Copying %s to temporary blob file %s for upload", blobfile, name) with os.fdopen(fd, 'wb') as target: # If we don't get the length, ``copy_blob`` will. old_pos = blobfile.tell() blobfile.seek(0, 2) length = blobfile.tell() blobfile.seek(old_pos) copy_blob(blobfile, target, length) txn_data_size += length blobfile.close() self.restore.restoreBlob(oid, tid, data, name, None, active_txn_meta) else: self.restore.restore(oid, tid, data, '', None, active_txn_meta) return txn_data_size, blobfile is not None
def compare_truncated(self, src, dest): """Confirm that dest is a truncated copy of src. The copy process should have dropped all old revisions of objects in src. Also note that the dest does not retain transaction metadata. """ src_objects = {} # {oid: (tid, data, blob or None)} for txn in src.iterator(): for rec in txn: if is_blob_record(rec.data): try: fn = src.loadBlob(rec.oid, rec.tid) except ZODB.POSException.POSKeyError: blob = None else: blob = open(fn, 'rb').read() else: blob = None src_objects[rec.oid] = (rec.tid, rec.data, blob) unchecked = set(src_objects) for txn in dest.iterator(): for rec in txn: if is_blob_record(rec.data): try: fn = dest.loadBlob(rec.oid, rec.tid) except ZODB.POSException.POSKeyError: blob = None else: blob = open(fn, 'rb').read() else: blob = None dst_object = (rec.tid, rec.data, blob) src_object = src_objects[rec.oid] self.assertEqual(src_object, dst_object) unchecked.remove(rec.oid) self.assertEqual(len(unchecked), 0)
def copyTransactionsFrom(self, other): for trans in other.iterator(): self.tpc_begin(trans, trans.tid, trans.status) for record in trans: blobfilename = None if is_blob_record(record.data): try: blobfilename = other.loadBlob(record.oid, record.tid) except POSKeyError: pass if blobfilename is not None: fd, name = tempfile.mkstemp( suffix='.tmp', dir=self.fshelper.temp_dir) os.close(fd) os.chmod(name, 0640) utils.cp(open(blobfilename, 'rb'), open(name, 'wb')) self.restoreBlob(record.oid, record.tid, record.data, name, record.data_txn, trans) else: self.restore(record.oid, record.tid, record.data, '', record.data_txn, trans) self.tpc_vote(trans) self.tpc_finish(trans)
def copyTransactionsFrom(self, other): # pylint:disable=too-many-locals # adapted from ZODB.blob.BlobStorageMixin begin_time = time.time() txnum = 0 total_size = 0 blobhelper = self.blobhelper tpc = self.tpc restore = self.restore logger.info("Counting the transactions to copy.") num_txns = 0 for _ in other.iterator(): num_txns += 1 logger.info("Copying %d transactions", num_txns) for trans in other.iterator(): txnum += 1 num_txn_records = 0 tpc.tpc_begin(trans, trans.tid, trans.status) for record in trans: blobfile = None if is_blob_record(record.data): try: blobfile = other.openCommittedBlobFile( record.oid, record.tid) except POSKeyError: pass if blobfile is not None: fd, name = tempfile.mkstemp( suffix='.tmp', dir=blobhelper.temporaryDirectory()) os.close(fd) with open(name, 'wb') as target: copy_blob(blobfile, target) blobfile.close() restore.restoreBlob(record.oid, record.tid, record.data, name, record.data_txn, trans) else: restore.restore(record.oid, record.tid, record.data, '', record.data_txn, trans) num_txn_records += 1 if record.data: total_size += len(record.data) tpc.tpc_vote(trans) tpc.tpc_finish(trans) pct_complete = '%1.2f%%' % (txnum * 100.0 / num_txns) elapsed = time.time() - begin_time if elapsed: rate = total_size / 1e6 / elapsed else: rate = 0.0 rate_str = '%1.3f' % rate logger.info("Copied tid %d,%5d records | %6s MB/s (%6d/%6d,%7s)", bytes8_to_int64(trans.tid), num_txn_records, rate_str, txnum, num_txns, pct_complete) elapsed = time.time() - begin_time logger.info( "All %d transactions copied successfully in %4.1f minutes.", txnum, elapsed / 60.0)
def copyTransactionsFrom(self, other): # pylint:disable=too-many-locals,too-many-statements,too-many-branches # adapted from ZODB.blob.BlobStorageMixin begin_time = time.time() log_at = begin_time + self.log_interval txnum = 0 total_size = 0 blobhelper = self.blobhelper tpc = self.tpc restore = self.restore logger.info("Counting the transactions to copy.") other_it = other.iterator() logger.debug("Opened the other iterator: %s", other_it) try: num_txns = len(other_it) if num_txns == 0: # Hmm, that shouldn't really be right, should it? # Try the other path. raise TypeError() except TypeError: logger.debug("Iterator %s doesn't support len()", other_it) num_txns = 0 for _ in other_it: num_txns += 1 other_it.close() other_it = other.iterator() logger.info("Copying %d transactions", num_txns) tmp_blobs_to_rm = [] for trans in other_it: txnum += 1 num_txn_records = 0 tpc.tpc_begin(trans, trans.tid, trans.status) for record in trans: blobfile = None if is_blob_record(record.data): try: blobfile = other.openCommittedBlobFile( record.oid, record.tid) except POSKeyError: pass if blobfile is not None: fd, name = tempfile.mkstemp( suffix='.tmp', dir=blobhelper.temporaryDirectory()) tmp_blobs_to_rm.append(name) logger.debug( "Copying %s to temporary blob file %s for upload", blobfile, name) with os.fdopen(fd, 'wb') as target: copy_blob(blobfile, target) blobfile.close() restore.restoreBlob(record.oid, record.tid, record.data, name, record.data_txn, trans) else: restore.restore(record.oid, record.tid, record.data, '', record.data_txn, trans) num_txn_records += 1 if record.data: total_size += len(record.data) tpc.tpc_vote(trans) tpc.tpc_finish(trans) for tmp_blob in tmp_blobs_to_rm: logger.debug("Removing temporary blob file %s", tmp_blob) try: os.unlink(tmp_blob) except OSError: pass del tmp_blobs_to_rm[:] if txnum % self.log_count == 0 and time.time() > log_at: now = time.time() log_at = now + self.log_interval pct_complete = '%1.2f%%' % (txnum * 100.0 / num_txns) elapsed = now - begin_time if elapsed: rate = total_size / 1e6 / elapsed else: rate = 0.0 rate_str = '%1.3f' % rate logger.info( "Copied tid %d,%5d records | %6s MB/s (%6d/%6d,%7s)", bytes8_to_int64(trans.tid), num_txn_records, rate_str, txnum, num_txns, pct_complete) elapsed = time.time() - begin_time logger.info( "All %d transactions copied successfully in %4.1f minutes.", txnum, elapsed / 60.0)
def compare_exact(self, storage1, storage2): """Confirm that storage1 and storage2 contain equivalent data""" eq = self.assertEqual missing = object() iter1 = storage1.iterator() iter2 = storage2.iterator() for txn1, txn2 in zip(iter1, iter2): eq(txn1.tid, txn2.tid) eq(txn1.status, txn2.status) eq(txn1.user, txn2.user) eq(txn1.description, txn2.description) # b/w compat on the 'extension' attribute e1 = getattr(txn1, 'extension', missing) if e1 is missing: # old attribute name e1 = txn1._extension e2 = getattr(txn2, 'extension', missing) if e2 is missing: # old attribute name e2 = txn2._extension eq(e1, e2) # compare the objects in the transaction, but disregard # the order of the objects and any duplicated records # since those are not important. recs1 = dict([(r.oid, r) for r in txn1]) recs2 = dict([(r.oid, r) for r in txn2]) eq(len(recs1), len(recs2)) recs1 = sorted(iteritems(recs1)) recs2 = sorted(iteritems(recs2)) recs2.sort() for (_oid1, rec1), (_oid2, rec2) in zip(recs1, recs2): eq(rec1.oid, rec2.oid) eq(rec1.tid, rec2.tid) eq(rec1.data, rec2.data) if is_blob_record(rec1.data): try: fn1 = storage1.loadBlob(rec1.oid, rec1.tid) except ZODB.POSException.POSKeyError: self.assertRaises(ZODB.POSException.POSKeyError, storage2.loadBlob, rec1.oid, rec1.tid) else: fn2 = storage2.loadBlob(rec1.oid, rec1.tid) self.assertNotEqual(fn1, fn2) with open(fn1, 'rb') as f1, open(fn2, 'rb') as f2: eq(f1.read(), f2.read()) # Make sure ther are no more records left in txn1 and txn2, meaning # they were the same length try: next(iter1) except (IndexError, StopIteration): pass else: self.fail("storage1 has more records") try: next(iter2) except (IndexError, StopIteration): pass else: self.fail("storage2 has more records") iter1.close() iter2.close()
def compare_exact(self, storage1, storage2): """Confirm that storage1 and storage2 contain equivalent data""" eq = self.assertEqual missing = object() iter1 = storage1.iterator() iter2 = storage2.iterator() for txn1, txn2 in zip(iter1, iter2): eq(txn1.tid, txn2.tid) eq(txn1.status, txn2.status) eq(txn1.user, txn2.user) eq(txn1.description, txn2.description) # b/w compat on the 'extension' attribute e1 = getattr(txn1, 'extension', missing) if e1 is missing: # old attribute name e1 = txn1._extension e2 = getattr(txn2, 'extension', missing) if e2 is missing: # old attribute name e2 = txn2._extension eq(e1, e2) # compare the objects in the transaction, but disregard # the order of the objects and any duplicated records # since those are not important. recs1 = {r.oid: r for r in txn1} recs2 = {r.oid: r for r in txn1} eq(len(recs1), len(recs2)) recs1 = sorted(iteritems(recs1)) recs2 = sorted(iteritems(recs2)) recs2.sort() for (_oid1, rec1), (_oid2, rec2) in zip(recs1, recs2): eq(rec1.oid, rec2.oid) eq(rec1.tid, rec2.tid) eq(rec1.data, rec2.data) if is_blob_record(rec1.data): try: fn1 = storage1.loadBlob(rec1.oid, rec1.tid) except ZODB.POSException.POSKeyError: self.assertRaises( ZODB.POSException.POSKeyError, storage2.loadBlob, rec1.oid, rec1.tid) else: fn2 = storage2.loadBlob(rec1.oid, rec1.tid) self.assertNotEqual(fn1, fn2) with open(fn1, 'rb') as f1, open(fn2, 'rb') as f2: eq(f1.read(), f2.read()) # Make sure ther are no more records left in txn1 and txn2, meaning # they were the same length try: next(iter1) except (IndexError, StopIteration): pass else: self.fail("storage1 has more records") try: next(iter2) except (IndexError, StopIteration): pass else: self.fail("storage2 has more records") iter1.close() iter2.close()