Example #1
0
    def __poll_old_oids_and_remove(self, adapter, local_client):
        from relstorage.adapters.connmanager import connection_callback

        oids = OidSet(local_client.keys())
        # In local tests, this function executes against PostgreSQL 11 in .78s
        # for 133,002 older OIDs; or, .35s for 57,002 OIDs against MySQL 5.7.
        # In one production environment of 800,000 OIDs with a 98% survival rate,
        # using MySQL 5.7 takes an average of about 11s.
        logger.debug("Polling %d oids stored in cache", len(oids))

        @connection_callback(isolation_level=adapter.connmanager.isolation_load, read_only=True)
        def poll_old_oids(_conn, cursor):
            return adapter.mover.current_object_tids(cursor, oids)

        current_tids_for_oids = adapter.connmanager.open_and_call(poll_old_oids).get
        polled_invalid_oids = OidSet()
        peek = local_client._peek

        for oid in oids:
            current_tid = current_tids_for_oids(oid)
            if (current_tid is None
                    or peek(oid)[1] != current_tid):
                polled_invalid_oids.add(oid)

        logger.debug("Polled %d older oids stored in cache; %d survived",
                     len(oids), len(oids) - len(polled_invalid_oids))
        local_client.remove_invalid_persistent_oids(polled_invalid_oids)
Example #2
0
    def __poll_old_oids_and_remove(self, adapter, local_client):
        oids = OidSet(local_client.keys())
        # In local tests, this function executes against PostgreSQL 11 in .78s
        # for 133,002 older OIDs; or, .35s for 57,002 OIDs against MySQL 5.7.
        logger.debug("Polling %d oids stored in cache", len(oids))

        def poll_old_oids(_conn, cursor):
            return adapter.mover.current_object_tids(cursor, oids)

        poll_old_oids.transaction_isolation_level = adapter.connmanager.isolation_load
        poll_old_oids.transaction_read_only = True
        current_tids_for_oids = adapter.connmanager.open_and_call(
            poll_old_oids).get
        polled_invalid_oids = OidSet()
        peek = local_client._peek

        for oid in oids:
            current_tid = current_tids_for_oids(oid)
            if (current_tid is None or peek(oid)[1] != current_tid):
                polled_invalid_oids.add(oid)

        logger.debug("Polled %d older oids stored in cache; %d survived",
                     len(oids),
                     len(oids) - len(polled_invalid_oids))
        local_client.remove_invalid_persistent_oids(polled_invalid_oids)
Example #3
0
def _find_missing_references_from_pickles(destination, pickles, permanently_gone):
    # Return a set of objects missing from the database given the
    # pickle states of other objects.
    # *permanently_gone* is a set of oid byte strings that are
    # known to be missing and shouldn't be investigated and returned.
    oids = []
    for pickle in pickles:
        # Support zc.zlibstorage wrappers.
        if pickle.startswith(b'.z'):
            pickle = zlib.decompress(pickle[2:])
        get_oids_referenced_by_pickle(pickle, oids)

    logger.info(
        "Given %d pickles, there are %d unique references.",
        len(pickles), len(oids)
    )

    missing_oids = OidSet()
    destination.prefetch(oids)
    for oid in oids:
        if oid in permanently_gone:
            continue
        try:
            state, tid = destination.load(oid, b'')
            if is_blob_record(state):
                destination.loadBlob(oid, tid)
        except POSKeyError:
            missing_oids.add(bytes8_to_int64(oid))
    logger.info(
        "Given %d pickles, there are %d missing references.",
        len(pickles), len(missing_oids)
    )
    return missing_oids
Example #4
0
def read_missing_oids(oid_lines):
    """
    Parse lines into oids.

    >>> list(read_missing_oids([
    ...   "!!! Users 0 ?", "POSKeyError: foo",
    ...   "!!! Users 0 ?",
    ...   "!!! Users 1 ?",
    ...   "bad xref name, 1", "bad db",  ]))
    [0, 1]
    """
    result = OidSet()
    for line in oid_lines:
        if line.startswith('bad') or ':' in line:
            continue
        if line.startswith('!!!'):
            # zc.zodbdgc output. bad OID is always the
            # third field.
            try:
                oid = int(line.split()[2])
            except (ValueError, IndexError):
                logger.info("Malformed zc.zodbdgc input: %s", line)
                continue
            result.add(oid)
        else:
            # Just an int
            try:
                oid = int(line)
            except ValueError:
                logger.info("Malformed input: %s", line)
            else:
                result.add(oid)
    if oid_lines != sys.stdin:
        oid_lines.close()
    return result
Example #5
0
    def __poll_old_oids_and_remove(self, adapter, local_client, timeout):
        from relstorage.adapters.connmanager import connection_callback
        from relstorage.adapters.interfaces import AggregateOperationTimeoutError

        cached_oids = OidSet(local_client.keys())
        # In local tests, this function executes against PostgreSQL 11 in .78s
        # for 133,002 older OIDs; or, .35s for 57,002 OIDs against MySQL 5.7.
        # In one production environment of 800,000 OIDs with a 98% survival rate,
        # using MySQL 5.7 takes an average of about 11s.
        # However, it has been observed that in some cases, presumably when the database
        # is under intense IO stress, this can take 400s for 500,000 OIDS:
        # since the ``current_object_tids`` batches in groups of 1024, that works out to
        # .75s per SQL query. Not good. Hence the ability to set a timeout.
        logger.info("Polling %d oids stored in cache with SQL timeout %r",
                    len(cached_oids), timeout)

        @connection_callback(
            isolation_level=adapter.connmanager.isolation_load, read_only=True)
        def poll_cached_oids(_conn, cursor):
            # type: (Any, Any) -> Dict[Int, Int]
            """Return mapping of {oid_int: tid_int}"""
            try:
                return adapter.mover.current_object_tids(cursor,
                                                         cached_oids,
                                                         timeout=timeout)
            except AggregateOperationTimeoutError as ex:
                # If we time out, we can at least validate the results we have
                # so far.
                logger.info(
                    "Timed out polling the database for %s oids; will use %s partial results",
                    len(cached_oids), len(ex.partial_result))
                return ex.partial_result

        current_tids = adapter.connmanager.open_and_call(poll_cached_oids)
        current_tid = current_tids.get
        polled_invalid_oids = OidSet()
        cache_is_correct = local_client._cache.contains_oid_with_tid

        for oid_int in cached_oids:
            if not cache_is_correct(oid_int, current_tid(oid_int)):
                polled_invalid_oids.add(oid_int)

        logger.info(
            "Polled %d older oids stored in cache (%d found in database); %d survived",
            len(cached_oids), len(current_tids),
            len(cached_oids) - len(polled_invalid_oids))
        local_client.remove_invalid_persistent_oids(polled_invalid_oids)
Example #6
0
def main(argv=None):
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(name)s] %(levelname)s %(message)s")

    argv = argv if argv is not None else sys.argv

    # 1. Open the source and destination.
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--dry-run", dest="dry_run", action="store_true",
        default=False,
        help="Attempt to open both storages, then explain what would be done."
    )
    parser.add_argument(
        '--oid-list', dest='oid_list', type=argparse.FileType('r'),
        default='-',
        help="Where to read the list of OIDs from. "
             "Defaults to stdin for use in a pipe from zc.zodbgc."
    )
    parser.add_argument("config_file", type=argparse.FileType('r'))

    options = parser.parse_args(argv[1:])
    permanently_gone = OidSet()

    source, destination = open_storages(options)
    check_db_compat(source)
    check_db_compat(destination)

    if options.dry_run:
        # Make sure we can't commit.
        destination.tpc_finish = destination.tpc_abort


    # 2. Read incoming OIDs.
    missing_oids = read_missing_oids(options.oid_list)
    if not missing_oids:
        sys.exit("Unable to read any missing OIDs.")

    if options.dry_run:
        # And since we can't commit, the starting set of objects
        # known to be permanently gone are...the set we start with!
        permanently_gone.update(missing_oids)

    try:
        while missing_oids:
            missing_oids = _restore_missing_oids(source, destination,
                                                 missing_oids, permanently_gone)
    finally:
        source.close()
        destination.close()

    if permanently_gone:
        logger.warning(
            "The following referenced OIDs could not be recovered: %s",
            list(permanently_gone)
        )
Example #7
0
def _restore_missing_oids(source, destination, missing_oids, permanently_gone):
    # Implements steps 3 through 5.

    # Returns a set of oid ints that are missing from the database
    # considering only the objects that were restored.

    # 3. Get backup data
    # (oid, state, tid)
    backup_data = source._adapter.mover.load_currents(
        source._load_connection.cursor,
        missing_oids
    )

    # {oid: (state, tid)}
    backup_state_by_oid = OidMap()
    tids_to_oids = OidMap()
    for oid, state, tid in backup_data:
        if tid not in tids_to_oids:
            tids_to_oids[tid] = OidSet()
        tids_to_oids[tid].add(oid)
        backup_state_by_oid[oid] = (state, tid)

    found_oids = OidSet(backup_state_by_oid)
    oids_required_but_not_in_backup = OidSet_difference(missing_oids, found_oids)
    if oids_required_but_not_in_backup:
        logger.warning(
            "The backup storage is missing %d OIDs needed",
            len(oids_required_but_not_in_backup)
        )
        permanently_gone.update(oids_required_but_not_in_backup)
    del found_oids
    del oids_required_but_not_in_backup

    # 3b. Compare with destination.
    current_data = destination._adapter.mover.load_currents(
        destination._load_connection.cursor,
        backup_state_by_oid
    )
    current_data = list(current_data)
    for oid, _, tid in current_data:
        if oid not in backup_state_by_oid:
            # Common, expected case.
            continue

        _, backup_tid = backup_state_by_oid[oid]

        logger.warning(
            "Destination already contains data for %d. Check your OID list. "
            "Refusing to overwrite. (Source TID: %d; Destination TID: %d)",
            oid, backup_tid, tid
        )
        # If we're doing a dry-run, it's probably in here.
        OidSet_discard(permanently_gone, oid)
        del backup_state_by_oid[oid]
        tids_to_oids[backup_tid].remove(oid)
        if not tids_to_oids[backup_tid]:
            del tids_to_oids[backup_tid]
        continue

    if not tids_to_oids:
        logger.warning("All OIDs already present in destination; nothing to do.")
        return

    # 4. Produce phony storage that iterates the backup data.
    logger.info(
        "Beginning restore of %d OIDs in %d transactions.",
        len(backup_state_by_oid),
        len(tids_to_oids)
    )
    copy_from = MissingObjectStorage(backup_state_by_oid, tids_to_oids, source)

    # 5. Hand it over to be copied.
    destination.copyTransactionsFrom(copy_from)

    # Find anything still missing, after having stored what we could.
    newly_missing = _find_missing_references_from_pickles(
        destination,
        [x[0] for x in backup_state_by_oid.values()],
        permanently_gone)
    return newly_missing