예제 #1
0
def process_nonpatient_table(table: Table, engine: Engine,
                             progargs: Any) -> None:
    if progargs.rcep:
        return
    log.info("Preprocessing non-patient table {}".format(repr(table.name)))
    pk_col = get_effective_int_pk_col(table)
    other_pk_col = pk_col if pk_col != CRATE_COL_PK else None
    if other_pk_col:  # table has a primary key already
        crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
    else:
        crate_pk_col = make_bigint_autoincrement_column(
            CRATE_COL_PK, engine.dialect)
    table.append_column(crate_pk_col)  # must be Table-bound, as above
    add_columns(engine, table, [crate_pk_col])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[CRATE_COL_PK])
    if other_pk_col:
        execute(
            engine, """
            UPDATE {tablename} SET {crate_pk} = {rio_pk}
            WHERE {crate_pk} IS NULL
        """.format(tablename=table.name,
                   crate_pk=CRATE_COL_PK,
                   rio_pk=other_pk_col))
    add_indexes(engine, table, [{
        'index_name': CRATE_IDX_PK,
        'column': CRATE_COL_PK,
        'unique': True
    }])
예제 #2
0
 def add_indexes(self, engine: Engine, metadata: MetaData) -> None:
     for tablename, tabledict in self._index_requests.items():
         indexdictlist = []
         for indexname, indexdict in tabledict.items():
             indexdictlist.append(indexdict)
         tablename_casematch = get_case_insensitive_dict_key(
             metadata.tables, tablename)
         if not tablename_casematch:
             log.warning("add_indexes: Skipping index as table {} "
                         "absent".format(tablename))
             continue
         table = metadata.tables[tablename_casematch]
         add_indexes(engine, table, indexdictlist)
예제 #3
0
def process_table(table: Table, engine: Engine,
                  configoptions: PcmisConfigOptions) -> None:
    """
    Processes a PCMIS table by checking it has appropriate columns, perhaps
    adding a CRATE integer PK, and indexing it.

    Args:
        table: an SQLAlchemy Table to process
        engine: an SQLAlchemy Engine
        configoptions: an instance of :class:`PcmisConfigOptions`
    """
    tablename = table.name
    column_names = table.columns.keys()
    log.debug(f"TABLE: {tablename}; COLUMNS: {column_names}")

    existing_pk_cols = get_pk_colnames(table)
    assert len(existing_pk_cols) < 2, (
        f"Table {tablename} has >1 PK column; don't know what to do")
    if existing_pk_cols and not get_effective_int_pk_col(table):
        raise ValueError(f"Table {table!r} has a non-integer PK")
    adding_crate_pk = not existing_pk_cols

    required_cols = [CRATE_COL_PK] if not configoptions.print_sql_only else []

    if configoptions.drop_not_create:
        # ---------------------------------------------------------------------
        # DROP STUFF! Opposite order to creation (below)
        # ---------------------------------------------------------------------
        drop_indexes(engine, table, [CRATE_IDX_PK])
        drop_columns(engine, table, [CRATE_COL_PK])
    else:
        # ---------------------------------------------------------------------
        # CREATE STUFF!
        # ---------------------------------------------------------------------
        # SQL Server requires Table-bound columns in order to generate DDL:
        if adding_crate_pk:
            crate_pk_col = make_bigint_autoincrement_column(
                CRATE_COL_PK, engine.dialect)
            table.append_column(crate_pk_col)
            add_columns(engine, table, [crate_pk_col])
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=required_cols)
        add_indexes(engine, table, [{
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True
        }])
예제 #4
0
파일: ddhint.py 프로젝트: Phdmani/crate
    def create_indexes(self, engine: Engine, metadata: MetaData) -> None:
        """
        Creates indexes in the **source** database according to instructions we
        have previously received via :func:`add_source_index_request` and/or
        :func:`add_bulk_source_index_request`.

        Args:
            engine: SQLAlchemy database Engine
            metadata: SQLAlchemy ORM Metadata
        """
        for tablename, tabledict in self._index_requests.items():
            indexdictlist = []  # type: List[Dict[str, Any]]
            for indexname, indexdict in tabledict.items():
                indexdictlist.append(indexdict)
            tablename_casematch = get_case_insensitive_dict_key(
                metadata.tables, tablename)
            if not tablename_casematch:
                log.warning(
                    f"add_indexes: Skipping index as table {tablename} absent")
                continue
            table = metadata.tables[tablename_casematch]
            add_indexes(engine, table, indexdictlist)
예제 #5
0
def process_table(table: Table, engine: Engine, progargs: Any) -> None:
    tablename = table.name
    column_names = table.columns.keys()
    log.debug("TABLE: {}; COLUMNS: {}".format(tablename, column_names))

    existing_pk_cols = get_pk_colnames(table)
    assert len(existing_pk_cols) < 2, (
        "Table {} has >1 PK column; don't know what to do".format(tablename))
    if existing_pk_cols and not get_effective_int_pk_col(table):
        raise ValueError("Table {} has a non-integer PK".format(repr(table)))
    adding_crate_pk = not existing_pk_cols

    required_cols = [CRATE_COL_PK] if not progargs.print else []

    if progargs.drop_danger_drop:
        # ---------------------------------------------------------------------
        # DROP STUFF! Opposite order to creation (below)
        # ---------------------------------------------------------------------
        drop_indexes(engine, table, [CRATE_IDX_PK])
        drop_columns(engine, table, [CRATE_COL_PK])
    else:
        # ---------------------------------------------------------------------
        # CREATE STUFF!
        # ---------------------------------------------------------------------
        # SQL Server requires Table-bound columns in order to generate DDL:
        if adding_crate_pk:
            crate_pk_col = make_bigint_autoincrement_column(
                CRATE_COL_PK, engine.dialect)
            table.append_column(crate_pk_col)
            add_columns(engine, table, [crate_pk_col])
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=required_cols)
        add_indexes(engine, table, [{
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True
        }])
예제 #6
0
def process_clindocs_table(table: Table, engine: Engine,
                           progargs: Any) -> None:
    # For RiO only, not RCEP
    crate_col_max_docver = Column(CRATE_COL_MAX_DOCVER, Integer, nullable=True)
    crate_col_last_doc = Column(CRATE_COL_LAST_DOC, Integer, nullable=True)
    table.append_column(crate_col_max_docver)
    table.append_column(crate_col_last_doc)
    add_columns(engine, table, [crate_col_max_docver, crate_col_last_doc])
    add_indexes(engine, table, [
        {
            'index_name':
            CRATE_IDX_RIONUM_SERIALNUM,
            'column':
            '{rio_number}, SerialNumber'.format(
                rio_number=CRATE_COL_RIO_NUMBER),
        },
        {
            'index_name': CRATE_IDX_MAX_DOCVER,
            'column': CRATE_COL_MAX_DOCVER,
        },
        {
            'index_name': CRATE_IDX_LAST_DOC,
            'column': CRATE_COL_LAST_DOC,
        },
    ])

    required_cols = ["SerialNumber", "RevisionID"]
    if not progargs.print:
        required_cols.extend(
            [CRATE_COL_MAX_DOCVER, CRATE_COL_LAST_DOC, CRATE_COL_RIO_NUMBER])
    ensure_columns_present(engine,
                           tablename=table.name,
                           column_names=required_cols)

    # Find the maximum SerialNumber for each note, and store it.
    # Slow query, even with index.
    log.info("Clinical documents table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_MAX_DOCVER)))
    execute(
        engine, """
        UPDATE p1
        SET p1.{max_docver_col} = subq.max_docver
        FROM {tablename} p1 JOIN (
            SELECT {rio_number}, SerialNumber, MAX(RevisionID) AS max_docver
            FROM {tablename} p2
            GROUP BY {rio_number}, SerialNumber
        ) subq
        ON subq.{rio_number} = p1.{rio_number}
        AND subq.SerialNumber = p1.SerialNumber
        WHERE p1.{max_docver_col} IS NULL
    """.format(
            max_docver_col=CRATE_COL_MAX_DOCVER,
            tablename=table.name,
            rio_number=CRATE_COL_RIO_NUMBER,
        ))

    # Set a single column accordingly
    log.info("Clinical documents table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_LAST_DOC)))
    execute(
        engine, """
        UPDATE {tablename} SET
            {last_doc_col} =
                CASE
                    WHEN RevisionID = {max_docver_col} THEN 1
                    ELSE 0
                END
        WHERE {last_doc_col} IS NULL
    """.format(
            tablename=table.name,
            last_doc_col=CRATE_COL_LAST_DOC,
            max_docver_col=CRATE_COL_MAX_DOCVER,
        ))
예제 #7
0
def process_progress_notes(table: Table, engine: Engine,
                           progargs: Any) -> None:
    crate_col_max_subnum = Column(CRATE_COL_MAX_SUBNUM, Integer, nullable=True)
    crate_col_last_note = Column(CRATE_COL_LAST_NOTE, Integer, nullable=True)
    table.append_column(crate_col_max_subnum)
    table.append_column(crate_col_last_note)
    add_columns(engine, table, [crate_col_max_subnum, crate_col_last_note])
    # We're always in "RiO land", not "RCEP land", for this one.
    add_indexes(engine, table, [
        {  # Joint index, for JOIN in UPDATE statement below
            'index_name': CRATE_IDX_RIONUM_NOTENUM,
            'column': '{rio_number}, NoteNum'.format(
                rio_number=CRATE_COL_RIO_NUMBER),
        },
        {  # Speeds up WHERE below. (Much, much faster for second run.)
            'index_name': CRATE_IDX_MAX_SUBNUM,
            'column': CRATE_COL_MAX_SUBNUM,
        },
        {  # Speeds up WHERE below. (Much, much faster for second run.)
            'index_name': CRATE_IDX_LAST_NOTE,
            'column': CRATE_COL_LAST_NOTE,
        },
    ])

    ensure_columns_present(
        engine,
        tablename=table.name,
        column_names=["NoteNum", "SubNum", "EnteredInError", "EnteredInError"])
    if not progargs.print:
        ensure_columns_present(engine,
                               tablename=table.name,
                               column_names=[
                                   CRATE_COL_MAX_SUBNUM, CRATE_COL_LAST_NOTE,
                                   CRATE_COL_RIO_NUMBER
                               ])

    # Find the maximum SubNum for each note, and store it.
    # Slow query, even with index.
    log.info("Progress notes table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_MAX_SUBNUM)))
    execute(
        engine, """
        UPDATE p1
        SET p1.{max_subnum_col} = subq.max_subnum
        FROM {tablename} p1 JOIN (
            SELECT {rio_number}, NoteNum, MAX(SubNum) AS max_subnum
            FROM {tablename} p2
            GROUP BY {rio_number}, NoteNum
        ) subq
        ON subq.{rio_number} = p1.{rio_number}
        AND subq.NoteNum = p1.NoteNum
        WHERE p1.{max_subnum_col} IS NULL
    """.format(
            max_subnum_col=CRATE_COL_MAX_SUBNUM,
            tablename=table.name,
            rio_number=CRATE_COL_RIO_NUMBER,
        ))

    # Set a single column accordingly
    log.info("Progress notes table {}: updating {}".format(
        repr(table.name), repr(CRATE_COL_LAST_NOTE)))
    execute(
        engine, """
        UPDATE {tablename} SET
            {last_note_col} =
                CASE
                    WHEN SubNum = {max_subnum_col} THEN 1
                    ELSE 0
                END
        WHERE {last_note_col} IS NULL
    """.format(
            tablename=table.name,
            last_note_col=CRATE_COL_LAST_NOTE,
            max_subnum_col=CRATE_COL_MAX_SUBNUM,
        ))

    # Create a view, if we're on an RCEP database
    if progargs.rcep and progargs.cpft:
        select_sql = """
            SELECT *
            FROM {tablename}
            WHERE
                (EnteredInError <> 1 OR EnteredInError IS NULL)
                AND {last_note_col} = 1
        """.format(
            tablename=table.name,
            last_note_col=CRATE_COL_LAST_NOTE,
        )
        create_view(engine, VIEW_RCEP_CPFT_PROGRESS_NOTES_CURRENT, select_sql)
예제 #8
0
def process_patient_table(table: Table, engine: Engine, progargs: Any) -> None:
    log.info("Preprocessing patient table: {}".format(repr(table.name)))
    rio_type = table_is_rio_type(table.name, progargs)
    if rio_type:
        pk_col = get_effective_int_pk_col(table)
        rio_pk = pk_col if pk_col != CRATE_COL_PK else None
        string_pt_id = get_rio_patient_id_col(table)
        required_cols = [string_pt_id]
    else:  # RCEP type
        rio_pk = None
        required_cols = [RCEP_COL_PATIENT_ID]
        string_pt_id = RCEP_COL_PATIENT_ID
    if not progargs.print:
        required_cols.extend([CRATE_COL_PK, CRATE_COL_RIO_NUMBER])

    # -------------------------------------------------------------------------
    # Add pk and rio_number columns, if not present
    # -------------------------------------------------------------------------
    if rio_type and rio_pk is not None:
        crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
        # ... can't do NOT NULL; need to populate it
        required_cols.append(rio_pk)
    else:  # RCEP type, or no PK in RiO
        crate_pk_col = make_bigint_autoincrement_column(
            CRATE_COL_PK, engine.dialect)
        # ... autopopulates
    crate_rio_number_col = Column(CRATE_COL_RIO_NUMBER,
                                  BigInteger,
                                  nullable=True)
    # ... even if RiO numbers are INT, they come from VARCHAR(15) here, and
    # that can (aod does) look numeric and overflow an INT.
    # SQL Server requires Table-bound columns in order to generate DDL:
    table.append_column(crate_pk_col)
    table.append_column(crate_rio_number_col)
    add_columns(engine, table, [crate_pk_col, crate_rio_number_col])

    # -------------------------------------------------------------------------
    # Update pk and rio_number values, if not NULL
    # -------------------------------------------------------------------------
    ensure_columns_present(engine,
                           tablename=table.name,
                           column_names=required_cols)
    cast_id_to_int = sql_fragment_cast_to_int(string_pt_id,
                                              dialect=engine.dialect)
    if rio_type and rio_pk:
        log.info("Table {}: updating columns {} and {}".format(
            repr(table.name), repr(CRATE_COL_PK), repr(CRATE_COL_RIO_NUMBER)))
        execute(
            engine, """
            UPDATE {tablename} SET
                {crate_pk} = {rio_pk},
                {crate_rio_number} = {cast_id_to_int}
            WHERE
                {crate_pk} IS NULL
                OR {crate_rio_number} IS NULL
        """.format(
                tablename=table.name,
                crate_pk=CRATE_COL_PK,
                rio_pk=rio_pk,
                crate_rio_number=CRATE_COL_RIO_NUMBER,
                cast_id_to_int=cast_id_to_int,
            ))
    else:
        # RCEP format, or RiO with no PK
        # crate_pk is autogenerated as an INT IDENTITY field
        log.info("Table {}: updating column {}".format(
            repr(table.name), repr(CRATE_COL_RIO_NUMBER)))
        execute(
            engine,
            """
            UPDATE {tablename} SET
                {crate_rio_number} = {cast_id_to_int}
            WHERE
                {crate_rio_number} IS NULL
        """.format(  # noqa
                tablename=table.name,
                crate_rio_number=CRATE_COL_RIO_NUMBER,
                cast_id_to_int=cast_id_to_int,
            ))
    # -------------------------------------------------------------------------
    # Add indexes, if absent
    # -------------------------------------------------------------------------
    # Note that the indexes are unlikely to speed up the WHERE NOT NULL search
    # above, so it doesn't matter that we add these last. Their use is for
    # the subsequent CRATE anonymisation table scans.
    add_indexes(engine, table, [
        {
            'index_name': CRATE_IDX_PK,
            'column': CRATE_COL_PK,
            'unique': True,
        },
        {
            'index_name': CRATE_IDX_RIONUM,
            'column': CRATE_COL_RIO_NUMBER,
        },
    ])