Ejemplo n.º 1
0
def _find_missing_references_from_pickles(destination, pickles, permanently_gone):
    # Return a set of objects missing from the database given the
    # pickle states of other objects.
    # *permanently_gone* is a set of oid byte strings that are
    # known to be missing and shouldn't be investigated and returned.
    oids = []
    for pickle in pickles:
        # Support zc.zlibstorage wrappers.
        if pickle.startswith(b'.z'):
            pickle = zlib.decompress(pickle[2:])
        get_oids_referenced_by_pickle(pickle, oids)

    logger.info(
        "Given %d pickles, there are %d unique references.",
        len(pickles), len(oids)
    )

    missing_oids = OidSet()
    destination.prefetch(oids)
    for oid in oids:
        if oid in permanently_gone:
            continue
        try:
            state, tid = destination.load(oid, b'')
            if is_blob_record(state):
                destination.loadBlob(oid, tid)
        except POSKeyError:
            missing_oids.add(bytes8_to_int64(oid))
    logger.info(
        "Given %d pickles, there are %d missing references.",
        len(pickles), len(missing_oids)
    )
    return missing_oids
Ejemplo n.º 2
0
    def __copy_transaction(self, other, trans):
        # Originally adapted from ZODB.blob.BlobStorageMixin
        tpc = self.tpc
        num_txn_records = 0
        txn_data_size = 0
        num_blobs = 0
        tmp_blobs_to_rm = []

        tpc.tpc_begin(trans, trans.tid, trans.status)
        for record in trans:
            num_txn_records += 1
            if record.data:
                txn_data_size += len(record.data)

            blobfile = None
            if is_blob_record(record.data):
                try:
                    blobfile = other.openCommittedBlobFile(
                        record.oid, record.tid)
                except POSKeyError:
                    logger.exception("Failed to open blob to copy")
            if blobfile is not None:
                fd, name = tempfile.mkstemp(
                    suffix='.tmp', dir=self.blobhelper.temporaryDirectory())
                tmp_blobs_to_rm.append(name)
                logger.log(TRACE,
                           "Copying %s to temporary blob file %s for upload",
                           blobfile, name)

                with os.fdopen(fd, 'wb') as target:
                    # If we don't get the length, ``copy_blob`` will.
                    old_pos = blobfile.tell()
                    blobfile.seek(0, 2)
                    length = blobfile.tell()
                    blobfile.seek(old_pos)

                    copy_blob(blobfile, target, length)
                    txn_data_size += length
                blobfile.close()
                self.restore.restoreBlob(record.oid, record.tid, record.data,
                                         name, record.data_txn, trans)
            else:
                self.restore.restore(record.oid, record.tid, record.data, '',
                                     record.data_txn, trans)

        tpc.tpc_vote(trans)
        tpc.tpc_finish(trans)

        num_blobs = len(tmp_blobs_to_rm)
        if num_blobs:
            for tmp_blob in tmp_blobs_to_rm:
                logger.log(TRACE, "Removing temporary blob file %s", tmp_blob)
                try:
                    os.unlink(tmp_blob)
                except OSError:
                    pass

        return num_txn_records, txn_data_size, num_blobs
Ejemplo n.º 3
0
    def restore_one(self, active_txn_meta,
                    oid, tid, data):

        # The signature for both ``restore`` and ``restoreBlob``
        # is:
        #
        #   (oid, serial, data, (blobfilename|prev_txn), version, txn)
        #
        # Where ``txn`` is the TransactionMetaData object
        # originally passed to ``tpc_begin``. It is only used to
        # check that the same object has been passed.
        #
        # ``prev_txn`` is not used but would come from ``record.data_txn``

        txn_data_size = len(data) if data else 0

        blobfile = None
        if is_blob_record(data):
            try:
                blobfile = self.storage.openCommittedBlobFile(
                    oid, tid)
            except POSKeyError: # pragma: no cover
                logger.exception("Failed to open blob to copy")

        # We may not be able to read the data after this.
        data = self.restore._crs_transform_record_data(data)
        if blobfile is not None:
            fd, name = tempfile.mkstemp(
                suffix='.tmp',
                dir=self.blobhelper.temporaryDirectory()
            )
            self.temp_blobs_to_rm.append(name)
            logger.log(
                TRACE,
                "Copying %s to temporary blob file %s for upload",
                blobfile, name)

            with os.fdopen(fd, 'wb') as target:
                # If we don't get the length, ``copy_blob`` will.
                old_pos = blobfile.tell()
                blobfile.seek(0, 2)
                length = blobfile.tell()
                blobfile.seek(old_pos)

                copy_blob(blobfile, target, length)
                txn_data_size += length
            blobfile.close()
            self.restore.restoreBlob(oid, tid, data,
                                     name, None, active_txn_meta)
        else:
            self.restore.restore(oid, tid, data,
                                 '', None, active_txn_meta)

        return txn_data_size, blobfile is not None
Ejemplo n.º 4
0
    def compare_truncated(self, src, dest):
        """Confirm that dest is a truncated copy of src.

        The copy process should have dropped all old revisions of objects
        in src.  Also note that the dest does not retain transaction
        metadata.
        """

        src_objects = {}  # {oid: (tid, data, blob or None)}
        for txn in src.iterator():
            for rec in txn:
                if is_blob_record(rec.data):
                    try:
                        fn = src.loadBlob(rec.oid, rec.tid)
                    except ZODB.POSException.POSKeyError:
                        blob = None
                    else:
                        blob = open(fn, 'rb').read()
                else:
                    blob = None
                src_objects[rec.oid] = (rec.tid, rec.data, blob)

        unchecked = set(src_objects)
        for txn in dest.iterator():
            for rec in txn:
                if is_blob_record(rec.data):
                    try:
                        fn = dest.loadBlob(rec.oid, rec.tid)
                    except ZODB.POSException.POSKeyError:
                        blob = None
                    else:
                        blob = open(fn, 'rb').read()
                else:
                    blob = None
                dst_object = (rec.tid, rec.data, blob)
                src_object = src_objects[rec.oid]
                self.assertEqual(src_object, dst_object)
                unchecked.remove(rec.oid)

        self.assertEqual(len(unchecked), 0)
Ejemplo n.º 5
0
    def compare_truncated(self, src, dest):
        """Confirm that dest is a truncated copy of src.

        The copy process should have dropped all old revisions of objects
        in src.  Also note that the dest does not retain transaction
        metadata.
        """

        src_objects = {}  # {oid: (tid, data, blob or None)}
        for txn in src.iterator():
            for rec in txn:
                if is_blob_record(rec.data):
                    try:
                        fn = src.loadBlob(rec.oid, rec.tid)
                    except ZODB.POSException.POSKeyError:
                        blob = None
                    else:
                        blob = open(fn, 'rb').read()
                else:
                    blob = None
                src_objects[rec.oid] = (rec.tid, rec.data, blob)

        unchecked = set(src_objects)
        for txn in dest.iterator():
            for rec in txn:
                if is_blob_record(rec.data):
                    try:
                        fn = dest.loadBlob(rec.oid, rec.tid)
                    except ZODB.POSException.POSKeyError:
                        blob = None
                    else:
                        blob = open(fn, 'rb').read()
                else:
                    blob = None
                dst_object = (rec.tid, rec.data, blob)
                src_object = src_objects[rec.oid]
                self.assertEqual(src_object, dst_object)
                unchecked.remove(rec.oid)

        self.assertEqual(len(unchecked), 0)
def copyTransactionsFrom(self, other):
    for trans in other.iterator():
        self.tpc_begin(trans, trans.tid, trans.status)
        for record in trans:
            blobfilename = None
            if is_blob_record(record.data):
                try:
                    blobfilename = other.loadBlob(record.oid, record.tid)
                except POSKeyError:
                    pass
            if blobfilename is not None:
                fd, name = tempfile.mkstemp(
                    suffix='.tmp', dir=self.fshelper.temp_dir)
                os.close(fd)
                os.chmod(name, 0640)
                utils.cp(open(blobfilename, 'rb'), open(name, 'wb'))
                self.restoreBlob(record.oid, record.tid, record.data,
                                 name, record.data_txn, trans)
            else:
                self.restore(record.oid, record.tid, record.data,
                             '', record.data_txn, trans)

        self.tpc_vote(trans)
        self.tpc_finish(trans)
Ejemplo n.º 7
0
    def copyTransactionsFrom(self, other):
        # pylint:disable=too-many-locals
        # adapted from ZODB.blob.BlobStorageMixin
        begin_time = time.time()
        txnum = 0
        total_size = 0
        blobhelper = self.blobhelper
        tpc = self.tpc
        restore = self.restore

        logger.info("Counting the transactions to copy.")
        num_txns = 0
        for _ in other.iterator():
            num_txns += 1
        logger.info("Copying %d transactions", num_txns)

        for trans in other.iterator():
            txnum += 1
            num_txn_records = 0

            tpc.tpc_begin(trans, trans.tid, trans.status)
            for record in trans:
                blobfile = None
                if is_blob_record(record.data):
                    try:
                        blobfile = other.openCommittedBlobFile(
                            record.oid, record.tid)
                    except POSKeyError:
                        pass
                if blobfile is not None:
                    fd, name = tempfile.mkstemp(
                        suffix='.tmp', dir=blobhelper.temporaryDirectory())
                    os.close(fd)
                    with open(name, 'wb') as target:
                        copy_blob(blobfile, target)
                    blobfile.close()
                    restore.restoreBlob(record.oid, record.tid, record.data,
                                        name, record.data_txn, trans)
                else:
                    restore.restore(record.oid, record.tid, record.data, '',
                                    record.data_txn, trans)
                num_txn_records += 1
                if record.data:
                    total_size += len(record.data)
            tpc.tpc_vote(trans)
            tpc.tpc_finish(trans)

            pct_complete = '%1.2f%%' % (txnum * 100.0 / num_txns)
            elapsed = time.time() - begin_time
            if elapsed:
                rate = total_size / 1e6 / elapsed
            else:
                rate = 0.0
            rate_str = '%1.3f' % rate
            logger.info("Copied tid %d,%5d records | %6s MB/s (%6d/%6d,%7s)",
                        bytes8_to_int64(trans.tid), num_txn_records, rate_str,
                        txnum, num_txns, pct_complete)

        elapsed = time.time() - begin_time
        logger.info(
            "All %d transactions copied successfully in %4.1f minutes.", txnum,
            elapsed / 60.0)
Ejemplo n.º 8
0
    def copyTransactionsFrom(self, other):
        # pylint:disable=too-many-locals,too-many-statements,too-many-branches
        # adapted from ZODB.blob.BlobStorageMixin
        begin_time = time.time()
        log_at = begin_time + self.log_interval
        txnum = 0
        total_size = 0
        blobhelper = self.blobhelper
        tpc = self.tpc
        restore = self.restore

        logger.info("Counting the transactions to copy.")
        other_it = other.iterator()
        logger.debug("Opened the other iterator: %s", other_it)
        try:
            num_txns = len(other_it)
            if num_txns == 0:
                # Hmm, that shouldn't really be right, should it?
                # Try the other path.
                raise TypeError()
        except TypeError:
            logger.debug("Iterator %s doesn't support len()", other_it)
            num_txns = 0
            for _ in other_it:
                num_txns += 1
            other_it.close()
            other_it = other.iterator()
        logger.info("Copying %d transactions", num_txns)

        tmp_blobs_to_rm = []
        for trans in other_it:
            txnum += 1
            num_txn_records = 0

            tpc.tpc_begin(trans, trans.tid, trans.status)
            for record in trans:
                blobfile = None
                if is_blob_record(record.data):
                    try:
                        blobfile = other.openCommittedBlobFile(
                            record.oid, record.tid)
                    except POSKeyError:
                        pass
                if blobfile is not None:
                    fd, name = tempfile.mkstemp(
                        suffix='.tmp', dir=blobhelper.temporaryDirectory())
                    tmp_blobs_to_rm.append(name)
                    logger.debug(
                        "Copying %s to temporary blob file %s for upload",
                        blobfile, name)

                    with os.fdopen(fd, 'wb') as target:
                        copy_blob(blobfile, target)
                    blobfile.close()
                    restore.restoreBlob(record.oid, record.tid, record.data,
                                        name, record.data_txn, trans)
                else:
                    restore.restore(record.oid, record.tid, record.data, '',
                                    record.data_txn, trans)
                num_txn_records += 1
                if record.data:
                    total_size += len(record.data)
            tpc.tpc_vote(trans)
            tpc.tpc_finish(trans)

            for tmp_blob in tmp_blobs_to_rm:
                logger.debug("Removing temporary blob file %s", tmp_blob)
                try:
                    os.unlink(tmp_blob)
                except OSError:
                    pass
            del tmp_blobs_to_rm[:]

            if txnum % self.log_count == 0 and time.time() > log_at:
                now = time.time()
                log_at = now + self.log_interval

                pct_complete = '%1.2f%%' % (txnum * 100.0 / num_txns)
                elapsed = now - begin_time
                if elapsed:
                    rate = total_size / 1e6 / elapsed
                else:
                    rate = 0.0
                rate_str = '%1.3f' % rate

                logger.info(
                    "Copied tid %d,%5d records | %6s MB/s (%6d/%6d,%7s)",
                    bytes8_to_int64(trans.tid), num_txn_records, rate_str,
                    txnum, num_txns, pct_complete)

        elapsed = time.time() - begin_time
        logger.info(
            "All %d transactions copied successfully in %4.1f minutes.", txnum,
            elapsed / 60.0)
Ejemplo n.º 9
0
    def compare_exact(self, storage1, storage2):
        """Confirm that storage1 and storage2 contain equivalent data"""
        eq = self.assertEqual
        missing = object()
        iter1 = storage1.iterator()
        iter2 = storage2.iterator()
        for txn1, txn2 in zip(iter1, iter2):
            eq(txn1.tid, txn2.tid)
            eq(txn1.status, txn2.status)
            eq(txn1.user, txn2.user)
            eq(txn1.description, txn2.description)

            # b/w compat on the 'extension' attribute
            e1 = getattr(txn1, 'extension', missing)
            if e1 is missing:
                # old attribute name
                e1 = txn1._extension
            e2 = getattr(txn2, 'extension', missing)
            if e2 is missing:
                # old attribute name
                e2 = txn2._extension
            eq(e1, e2)

            # compare the objects in the transaction, but disregard
            # the order of the objects and any duplicated records
            # since those are not important.
            recs1 = dict([(r.oid, r) for r in txn1])
            recs2 = dict([(r.oid, r) for r in txn2])
            eq(len(recs1), len(recs2))
            recs1 = sorted(iteritems(recs1))
            recs2 = sorted(iteritems(recs2))
            recs2.sort()
            for (_oid1, rec1), (_oid2, rec2) in zip(recs1, recs2):
                eq(rec1.oid, rec2.oid)
                eq(rec1.tid, rec2.tid)
                eq(rec1.data, rec2.data)
                if is_blob_record(rec1.data):
                    try:
                        fn1 = storage1.loadBlob(rec1.oid, rec1.tid)
                    except ZODB.POSException.POSKeyError:
                        self.assertRaises(ZODB.POSException.POSKeyError,
                                          storage2.loadBlob, rec1.oid,
                                          rec1.tid)
                    else:
                        fn2 = storage2.loadBlob(rec1.oid, rec1.tid)
                        self.assertNotEqual(fn1, fn2)
                        with open(fn1, 'rb') as f1, open(fn2, 'rb') as f2:
                            eq(f1.read(), f2.read())

        # Make sure ther are no more records left in txn1 and txn2, meaning
        # they were the same length
        try:
            next(iter1)
        except (IndexError, StopIteration):
            pass
        else:
            self.fail("storage1 has more records")

        try:
            next(iter2)
        except (IndexError, StopIteration):
            pass
        else:
            self.fail("storage2 has more records")

        iter1.close()
        iter2.close()
Ejemplo n.º 10
0
    def compare_exact(self, storage1, storage2):
        """Confirm that storage1 and storage2 contain equivalent data"""
        eq = self.assertEqual
        missing = object()
        iter1 = storage1.iterator()
        iter2 = storage2.iterator()
        for txn1, txn2 in zip(iter1, iter2):
            eq(txn1.tid, txn2.tid)
            eq(txn1.status, txn2.status)
            eq(txn1.user, txn2.user)
            eq(txn1.description, txn2.description)

            # b/w compat on the 'extension' attribute
            e1 = getattr(txn1, 'extension', missing)
            if e1 is missing:
                # old attribute name
                e1 = txn1._extension
            e2 = getattr(txn2, 'extension', missing)
            if e2 is missing:
                # old attribute name
                e2 = txn2._extension
            eq(e1, e2)

            # compare the objects in the transaction, but disregard
            # the order of the objects and any duplicated records
            # since those are not important.
            recs1 = {r.oid: r for r in txn1}
            recs2 = {r.oid: r for r in txn1}
            eq(len(recs1), len(recs2))
            recs1 = sorted(iteritems(recs1))
            recs2 = sorted(iteritems(recs2))
            recs2.sort()
            for (_oid1, rec1), (_oid2, rec2) in zip(recs1, recs2):
                eq(rec1.oid, rec2.oid)
                eq(rec1.tid, rec2.tid)
                eq(rec1.data, rec2.data)
                if is_blob_record(rec1.data):
                    try:
                        fn1 = storage1.loadBlob(rec1.oid, rec1.tid)
                    except ZODB.POSException.POSKeyError:
                        self.assertRaises(
                            ZODB.POSException.POSKeyError,
                            storage2.loadBlob, rec1.oid, rec1.tid)
                    else:
                        fn2 = storage2.loadBlob(rec1.oid, rec1.tid)
                        self.assertNotEqual(fn1, fn2)
                        with open(fn1, 'rb') as f1, open(fn2, 'rb') as f2:
                            eq(f1.read(), f2.read())

        # Make sure ther are no more records left in txn1 and txn2, meaning
        # they were the same length
        try:
            next(iter1)
        except (IndexError, StopIteration):
            pass
        else:
            self.fail("storage1 has more records")

        try:
            next(iter2)
        except (IndexError, StopIteration):
            pass
        else:
            self.fail("storage2 has more records")

        iter1.close()
        iter2.close()