Exemple #1
0
 def set_idnum_value(
     self, req: "CamcopsRequest", which_idnum: int, idnum_value: int
 ) -> None:
     """
     Sets an ID number value.
     """
     dbsession = req.dbsession
     ccsession = req.camcops_session
     idnums = self.idnums  # type: List[PatientIdNum]
     for idobj in idnums:
         if idobj.which_idnum == which_idnum:
             idobj.idnum_value = idnum_value
             return
     # Otherwise, make a new one:
     newid = PatientIdNum()
     newid.patient_id = self.id
     newid._device_id = self._device_id
     newid._era = self._era
     newid._current = True
     newid._when_added_exact = req.now_era_format
     newid._when_added_batch_utc = req.now_utc
     newid._adding_user_id = ccsession.user_id
     newid._camcops_version = CAMCOPS_SERVER_VERSION_STRING
     dbsession.add(newid)
     self.idnums.append(newid)
Exemple #2
0
def translate_fn(trcon: TranslationContext) -> None:
    """
    Function to translate source objects to their destination counterparts,
    where special processing is required. Called as a callback from
    :func:`cardinal_pythonlib.sqlalchemy.merge_db.merge_db`.

    Args:
        trcon: the :class:`TranslationContext`; all the relevant information is
            in here, and our function modifies its members.

    This function does the following things:

    - For any records uploaded from tablets: set ``_group_id``, if it's blank.

    - For :class:`camcops_server.cc_modules.cc_user.User` objects: if an
      identical user is found in the destination database, merge on it rather
      than creating a new one. Users with matching usernames are considered to
      be identical.

    - For :class:`Device` objects: if an identical device is found, merge on it
      rather than creating a new one. Devices with matching names are
      considered to be identical.

    - For :class:`camcops_server.cc_modules.cc_group.Group` objects: if an
      identical group is found, merge on it rather than creating a new one.
      Groups with matching names are considered to be identical.

    - For :class:`camcops_server.cc_modules.cc_patient.Patient` objects: if any
      have ID numbers in the old format (as columns in the Patient table),
      convert them to the :class:`PatientIdNum` system.

    - If we're inserting a :class:`PatientIdNum`, make sure there is a
      corresponding
      :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition`, and that
      it's valid.

    - If we're merging from a more modern database with the
      :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition` table,
      check our ID number definitions don't conflict.

    - Check we're not creating duplicates for anything uploaded.

    """
    log.debug("Translating object from table: {!r}", trcon.tablename)
    oldobj = trcon.oldobj
    newobj = trcon.newobj
    # log.debug("Translating: {}", auto_repr(oldobj))

    # -------------------------------------------------------------------------
    # Set _group_id correctly for tablet records
    # -------------------------------------------------------------------------
    if isinstance(oldobj, GenericTabletRecordMixin):
        if ("_group_id" in trcon.missing_src_columns
                or oldobj._group_id is None):
            # ... order that "if" statement carefully; if the _group_id column
            # is missing from the source, don't touch oldobj._group_id or
            # it'll trigger a DB query that fails.
            #
            # Set _group_id because it's blank
            #
            ensure_default_group_id(trcon)
            default_group_id = trcon.info["default_group_id"]  # type: int
            log.debug("Assiging new _group_id of {!r}", default_group_id)
            newobj._group_id = default_group_id
        else:
            #
            # Re-map _group_id
            #
            newobj._group_id = get_dest_groupnum(oldobj._group_id, trcon,
                                                 oldobj)

    # -------------------------------------------------------------------------
    # If an identical user is found, merge on it rather than creating a new
    # one. Users with matching usernames are considered to be identical.
    # -------------------------------------------------------------------------
    if trcon.tablename == User.__tablename__:
        src_user = cast(User, oldobj)
        src_username = src_user.username
        matching_user = (trcon.dst_session.query(User).filter(
            User.username == src_username).one_or_none()
                         )  # type: Optional[User]
        if matching_user is not None:
            log.debug(
                "Matching User (username {!r}) found; merging",
                matching_user.username,
            )
            trcon.newobj = matching_user  # so that related records will work

    # -------------------------------------------------------------------------
    # If an identical device is found, merge on it rather than creating a
    # new one. Devices with matching names are considered to be identical.
    # -------------------------------------------------------------------------
    if trcon.tablename == Device.__tablename__:
        src_device = cast(Device, oldobj)
        src_devicename = src_device.name
        matching_device = (trcon.dst_session.query(Device).filter(
            Device.name == src_devicename).one_or_none()
                           )  # type: Optional[Device]
        if matching_device is not None:
            log.debug(
                "Matching Device (name {!r}) found; merging",
                matching_device.name,
            )
            trcon.newobj = matching_device

        # BUT BEWARE, BECAUSE IF YOU MERGE THE SAME DATABASE TWICE (even if
        # that's a silly thing to do...), MERGING DEVICES WILL BREAK THE KEY
        # RELATIONSHIPS. For example,
        #   source:
        #       pk = 1, id = 1, device = 100, era = 'NOW', current = 1
        #   dest after first merge:
        #       pk = 1, id = 1, device = 100, era = 'NOW', current = 1
        #   dest after second merge:
        #       pk = 1, id = 1, device = 100, era = 'NOW', current = 1
        #       pk = 2, id = 1, device = 100, era = 'NOW', current = 1
        # ... so you get a clash/duplicate.
        # Mind you, that's fair, because there is a duplicate.
        # SO WE DO SEPARATE DUPLICATE CHECKING, below.

    # -------------------------------------------------------------------------
    # Don't copy Group records; the user must set these up manually and specify
    # groupnum_map, for safety
    # -------------------------------------------------------------------------
    if trcon.tablename == Group.__tablename__:
        trcon.newobj = None  # don't insert this object
        # ... don't set "newobj = None"; that wouldn't alter trcon
        # Now make sure the map is OK:
        src_group = cast(Group, oldobj)
        trcon.objmap[oldobj] = get_dst_group(
            dest_groupnum=get_dest_groupnum(src_group.id, trcon, src_group),
            dst_session=trcon.dst_session,
        )

    # -------------------------------------------------------------------------
    # If there are any patient numbers in the old format (as a set of
    # columns in the Patient table) which were not properly converted
    # to the new format (as individual records in the PatientIdNum
    # table), create new entries.
    # Only worth bothering with for _current entries.
    # (More explicitly: do not create new PatientIdNum entries for non-current
    # patients; it's very fiddly if there might be asynchrony between
    # Patient and PatientIdNum objects for that patient.)
    # -------------------------------------------------------------------------
    if trcon.tablename == Patient.__tablename__:
        # (a) Find old patient numbers
        old_patient = cast(Patient, oldobj)
        # noinspection PyUnresolvedReferences
        src_pt_query = (
            select([text("*")]).select_from(table(trcon.tablename)).where(
                column(Patient.id.name) == old_patient.id).where(
                    column(Patient._current.name) == True)  # noqa: E712
            .where(column(Patient._device_id.name) ==
                   old_patient._device_id).where(
                       column(Patient._era.name) == old_patient._era))
        rows = trcon.src_session.execute(src_pt_query)  # type: ResultProxy
        list_of_dicts = [dict(row.items()) for row in rows]
        assert (len(list_of_dicts) == 1
                ), "Failed to fetch old patient IDs correctly; bug?"
        old_patient_dict = list_of_dicts[0]

        # (b) If any don't exist in the new database, create them.
        # -- no, that's not right; we will be processing Patient before
        # PatientIdNum, so that should be: if any don't exist in the *source*
        # database, create them.
        src_tables = trcon.src_table_names
        for src_which_idnum in range(1, NUMBER_OF_IDNUMS_DEFUNCT + 1):
            old_fieldname = FP_ID_NUM + str(src_which_idnum)
            idnum_value = old_patient_dict[old_fieldname]
            if idnum_value is None:
                # Old Patient record didn't contain this ID number
                continue
            # Old Patient record *did* contain the ID number...
            if PatientIdNum.__tablename__ in src_tables:
                # noinspection PyUnresolvedReferences
                src_idnum_query = (select([func.count()]).select_from(
                    table(PatientIdNum.__tablename__)).where(
                        column(PatientIdNum.patient_id.name) ==
                        old_patient.id).where(
                            column(PatientIdNum._current.name) ==
                            old_patient._current).where(
                                column(PatientIdNum._device_id.name) ==
                                old_patient._device_id).where(
                                    column(PatientIdNum._era.name) ==
                                    old_patient._era).where(
                                        column(PatientIdNum.which_idnum.name)
                                        == src_which_idnum))
                n_present = trcon.src_session.execute(src_idnum_query).scalar()
                #                 ^^^
                #                  !
                if n_present != 0:
                    # There was already a PatientIdNum for this which_idnum
                    continue
            pidnum = PatientIdNum()
            # PatientIdNum fields:
            pidnum.id = fake_tablet_id_for_patientidnum(
                patient_id=old_patient.id, which_idnum=src_which_idnum)
            # ... guarantees a pseudo client (tablet) PK
            pidnum.patient_id = old_patient.id
            pidnum.which_idnum = get_dest_which_idnum(src_which_idnum, trcon,
                                                      oldobj)
            pidnum.idnum_value = idnum_value
            # GenericTabletRecordMixin fields:
            # _pk: autogenerated
            # noinspection PyUnresolvedReferences
            pidnum._device_id = trcon.objmap[old_patient._device].id
            pidnum._era = old_patient._era
            pidnum._current = old_patient._current
            pidnum._when_added_exact = old_patient._when_added_exact
            pidnum._when_added_batch_utc = old_patient._when_added_batch_utc
            # noinspection PyUnresolvedReferences
            pidnum._adding_user_id = (trcon.objmap[old_patient._adding_user].id
                                      if old_patient._adding_user is not None
                                      else None)
            pidnum._when_removed_exact = old_patient._when_removed_exact
            pidnum._when_removed_batch_utc = (
                old_patient._when_removed_batch_utc)
            # noinspection PyUnresolvedReferences
            pidnum._removing_user_id = (
                trcon.objmap[old_patient._removing_user].id
                if old_patient._removing_user is not None else None)
            # noinspection PyUnresolvedReferences
            pidnum._preserving_user_id = (
                trcon.objmap[old_patient._preserving_user].id
                if old_patient._preserving_user is not None else None)
            pidnum._forcibly_preserved = old_patient._forcibly_preserved
            pidnum._predecessor_pk = None  # Impossible to calculate properly
            pidnum._successor_pk = None  # Impossible to calculate properly
            pidnum._manually_erased = old_patient._manually_erased
            pidnum._manually_erased_at = old_patient._manually_erased_at
            # noinspection PyUnresolvedReferences
            pidnum._manually_erasing_user_id = (
                trcon.objmap[old_patient._manually_erasing_user].id
                if old_patient._manually_erasing_user is not None else None)
            pidnum._camcops_version = old_patient._camcops_version
            pidnum._addition_pending = old_patient._addition_pending
            pidnum._removal_pending = old_patient._removal_pending
            pidnum._group_id = newobj._group_id
            # ... will have been set above if it was blank

            # OK.
            log.debug("Inserting new PatientIdNum: {}", pidnum)
            trcon.dst_session.add(pidnum)

    # -------------------------------------------------------------------------
    # If we're inserting a PatientIdNum, make sure there is a corresponding
    # IdNumDefinition, and that it's valid
    # -------------------------------------------------------------------------
    if trcon.tablename == PatientIdNum.__tablename__:
        src_pidnum = cast(PatientIdNum, oldobj)
        src_which_idnum = src_pidnum.which_idnum
        # Is it present?
        if src_which_idnum is None:
            raise ValueError(f"Bad PatientIdNum: {src_pidnum!r}")
        # Ensure the new object has an appropriate ID number FK:
        dst_pidnum = cast(PatientIdNum, newobj)
        dst_pidnum.which_idnum = get_dest_which_idnum(src_which_idnum, trcon,
                                                      oldobj)

    # -------------------------------------------------------------------------
    # If we're merging from a more modern database with the IdNumDefinition
    # table, skip source IdNumDefinition records; the user must set these up
    # manually and specify whichidnum_map, for safety
    # -------------------------------------------------------------------------
    if trcon.tablename == IdNumDefinition.__tablename__:
        trcon.newobj = None  # don't insert this object
        # ... don't set "newobj = None"; that wouldn't alter trcon
        # Now make sure the map is OK:
        src_iddef = cast(IdNumDefinition, oldobj)
        trcon.objmap[oldobj] = get_dst_iddef(
            which_idnum=get_dest_which_idnum(src_iddef.which_idnum, trcon,
                                             src_iddef),
            dst_session=trcon.dst_session,
        )

    # -------------------------------------------------------------------------
    # Check we're not creating duplicates for anything uploaded
    # -------------------------------------------------------------------------
    if isinstance(oldobj, GenericTabletRecordMixin):
        # noinspection PyTypeChecker
        cls = newobj.__class__  # type: Type[GenericTabletRecordMixin]
        # Records uploaded from tablets must be unique on the combination of:
        #       id                  = table PK
        #       _device_id          = device
        #       _era                = device era
        #       _when_removed_exact = removal date or NULL
        # noinspection PyUnresolvedReferences
        exists_query = (select([
            func.count()
        ]).select_from(table(
            trcon.tablename)).where(column(cls.id.name) == oldobj.id).where(
                column(cls._device_id.name) == trcon.objmap[
                    oldobj._device].id).where(
                        column(cls._era.name) == oldobj._era).where(
                            column(cls._when_removed_exact.name) ==
                            oldobj._when_removed_exact))
        # Note re NULLs... Although it's an inconvenient truth in SQL that
        #   SELECT NULL = NULL; -- returns NULL
        # in this code we have a comparison of a column to a Python value.
        # SQLAlchemy is clever and renders "IS NULL" if the Python value is
        # None, or an "=" comparison otherwise.
        # If we were comparing a column to another column, we'd have to do
        # more; e.g.
        #
        # WRONG one-to-one join to self:
        #
        #   SELECT a._pk, b._pk, a._when_removed_exact
        #   FROM phq9 a
        #   INNER JOIN phq9 b
        #       ON a._pk = b._pk
        #       AND a._when_removed_exact = b._when_removed_exact;
        #
        #   -- drops all rows
        #
        # CORRECT one-to-one join to self:
        #
        #   SELECT a._pk, b._pk, a._when_removed_exact
        #   FROM phq9 a
        #   INNER JOIN phq9 b
        #       ON a._pk = b._pk
        #       AND (a._when_removed_exact = b._when_removed_exact
        #            OR (a._when_removed_exact IS NULL AND
        #                b._when_removed_exact IS NULL));
        #
        #   -- returns all rows
        n_exists = trcon.dst_session.execute(exists_query).scalar()
        if n_exists > 0:
            # noinspection PyUnresolvedReferences
            existing_rec_q = (select(["*"]).select_from(
                table(trcon.tablename)).where(
                    column(cls.id.name) == oldobj.id).where(
                        column(cls._device_id.name) == trcon.objmap[
                            oldobj._device].id).where(
                                column(cls._era.name) == oldobj._era).where(
                                    column(cls._when_removed_exact.name) ==
                                    oldobj._when_removed_exact))
            resultproxy = trcon.dst_session.execute(existing_rec_q).fetchall()
            existing_rec = [dict(row) for row in resultproxy]
            log.critical(
                "Source record, inheriting from GenericTabletRecordMixin and "
                "shown below, already exists in destination database... "
                "in table {t!r}, clashing on: "
                "id={i!r}, device_id={d!r}, era={e!r}, "
                "_when_removed_exact={w!r}.\n"
                "ARE YOU TRYING TO MERGE THE SAME DATABASE IN TWICE? "
                "DON'T.",
                t=trcon.tablename,
                i=oldobj.id,
                d=oldobj._device_id,
                e=oldobj._era,
                w=oldobj._when_removed_exact,
            )
            if trcon.tablename == PatientIdNum.__tablename__ and (
                    oldobj.id % NUMBER_OF_IDNUMS_DEFUNCT == 0):
                log.critical(
                    "Since this error has occurred for table {t!r} "
                    "(and for id % {n} == 0), "
                    "this error may reflect a previous bug in the patient ID "
                    "number fix for the database upload script, in which all "
                    "ID numbers for patients with patient.id = n were given "
                    "patient_idnum.id = n * {n} themselves (or possibly were "
                    "all given patient_idnum.id = 0). "
                    "Fix this by running, on the source database:\n\n"
                    "    UPDATE patient_idnum SET id = _pk;\n\n",
                    t=trcon.tablename,
                    n=NUMBER_OF_IDNUMS_DEFUNCT,
                )
            # Print the actual instance last; accessing them via pformat can
            # lead to crashes if there are missing source fields, as an
            # on-demand SELECT is executed sometimes (e.g. when a PatientIdNum
            # is printed, its Patient is selected, including the [user]
            # 'fullname' attribute that is absent in old databases).
            # Not a breaking point, since we're going to crash anyway, but
            # inelegant.
            # Since lazy loading (etc.) is configured at query time, the best
            # thing (as per Michael Bayer) is to detach the object from the
            # session:
            # https://groups.google.com/forum/#!topic/sqlalchemy/X_wA8K97smE
            trcon.src_session.expunge(oldobj)  # prevent implicit queries
            # Then all should work:
            log_warning_srcobj(oldobj)
            log.critical(
                "Existing record(s) in destination DB was/were:\n\n"
                "{}\n\n",
                pformat(existing_rec),
            )
            raise ValueError("Attempt to insert duplicate record; see log "
                             "message above.")