Exemple #1
0
def drop_pcmis_views(engine: Engine, metadata: MetaData,
                     configoptions: PcmisConfigOptions,
                     ddhint: DDHint) -> None:  # ddhint modified
    """
    Drops all PCMIS views.

    Args:
        engine: an SQLAlchemy Engine
        metadata: SQLAlchemy MetaData containing reflected details of database
        configoptions: an instance of :class:`PcmisConfigOptions`
        ddhint: a :class:`crate_anon/preprocess/ddhint.DDHint`, which will be
            modified
    """
    views = get_pcmis_views(engine, configoptions, ddhint)
    ddhint.drop_indexes(engine, metadata)
    for viewmaker in views:
        viewmaker.drop_view(engine)
Exemple #2
0
def get_rio_views(engine: Engine,
                  progargs: None,
                  ddhint: DDHint,
                  suppress_basetables: bool = True,
                  suppress_lookup: bool = True) -> List[ViewMaker]:
    # ddhint modified
    views = []  # type: List[ViewMaker]
    all_tables_lower = get_table_names(engine, to_lower=True)
    all_views_lower = get_view_names(engine, to_lower=True)
    all_selectables_lower = list(set(all_tables_lower + all_views_lower))
    for viewname, viewdetails in RIO_VIEWS.items():
        basetable = viewdetails['basetable']
        if basetable.lower() not in all_selectables_lower:
            log.warning("Skipping view {} as base table/view {} not "
                        "present".format(viewname, basetable))
            continue
        suppress_basetable = viewdetails.get('suppress_basetable',
                                             suppress_basetables)
        suppress_other_tables = viewdetails.get('suppress_other_tables', [])
        if suppress_basetable:
            ddhint.suppress_table(basetable)
        ddhint.suppress_tables(suppress_other_tables)
        rename = viewdetails.get('rename', None)
        enforce_same_n_rows_as_base = viewdetails.get(
            'enforce_same_n_rows_as_base', True)
        # noinspection PyTypeChecker
        viewmaker = ViewMaker(
            viewname=viewname,
            engine=engine,
            basetable=basetable,
            rename=rename,
            progargs=progargs,
            enforce_same_n_rows_as_base=enforce_same_n_rows_as_base)
        if 'add' in viewdetails:
            for addition in viewdetails['add']:
                func = addition['function']
                kwargs = addition.get('kwargs', {})
                kwargs['viewmaker'] = viewmaker
                func(**kwargs)  # will alter viewmaker
        if progargs.audit_info:
            rio_add_audit_info(viewmaker)  # will alter viewmaker
        if suppress_lookup:
            ddhint.suppress_tables(viewmaker.get_lookup_tables())
        ddhint.add_bulk_source_index_request(
            viewmaker.get_index_request_dict())
        views.append(viewmaker)
    return views
Exemple #3
0
def main() -> None:
    """
    Command-line parser. See command-line help.
    """
    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        formatter_class=RawDescriptionArgumentDefaultsHelpFormatter,
        description="Alters a PCMIS database to be suitable for CRATE.")
    parser.add_argument("--url", required=True, help="SQLAlchemy database URL")
    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose")
    parser.add_argument(
        "--print",
        action="store_true",
        help="Print SQL but do not execute it. (You can redirect the printed "
        "output to create an SQL script.")
    parser.add_argument("--echo", action="store_true", help="Echo SQL")

    parser.add_argument(
        "--drop-danger-drop",
        action="store_true",
        help="REMOVES new columns and indexes, rather than creating them. "
        "(There's not very much danger; no real information is lost, but "
        "it might take a while to recalculate it.)")

    parser.add_argument(
        "--debug-skiptables",
        action="store_true",
        help="DEBUG-ONLY OPTION. Skip tables (view creation only)")

    parser.add_argument(
        "--postcodedb",
        help='Specify database (schema) name for ONS Postcode Database (as '
        'imported by CRATE) to link to addresses as a view. With SQL '
        'Server, you will have to specify the schema as well as the '
        'database; e.g. "--postcodedb ONS_PD.dbo"')
    parser.add_argument(
        "--geogcols",
        nargs="*",
        default=DEFAULT_GEOG_COLS,
        help="List of geographical information columns to link in from ONS "
        "Postcode Database. BEWARE that you do not specify anything too "
        "identifying.")

    parser.add_argument(
        "--settings-filename",
        help="Specify filename to write draft ddgen_* settings to, for use in "
        "a CRATE anonymiser configuration file.")

    progargs = parser.parse_args()

    rootlogger = logging.getLogger()
    configure_logger_for_colour(
        rootlogger, level=logging.DEBUG if progargs.verbose else logging.INFO)

    log.info("CRATE in-place preprocessor for PCMIS databases")
    safeargs = {k: v for k, v in vars(progargs).items() if k != 'url'}
    log.debug(f"args (except url): {repr(safeargs)}")

    if progargs.postcodedb and not progargs.geogcols:
        raise ValueError(
            "If you specify postcodedb, you must specify some geogcols")

    set_print_not_execute(progargs.print)

    hack_in_mssql_xml_type()

    engine = create_engine(progargs.url, echo=progargs.echo, encoding=CHARSET)
    metadata = MetaData()
    metadata.bind = engine
    log.info(f"Database: {repr(engine.url)}")  # ... repr hides p/w
    log.debug(f"Dialect: {engine.dialect.name}")

    log.info("Reflecting (inspecting) database...")
    metadata.reflect(engine)
    log.info("... inspection complete")

    ddhint = DDHint()
    configoptions = PcmisConfigOptions(
        postcodedb=progargs.postcodedb,
        geogcols=progargs.geogcols,
        print_sql_only=progargs.print,
        drop_not_create=progargs.drop_danger_drop,
    )

    if progargs.drop_danger_drop:
        # Drop views (and view-induced table indexes) first
        drop_pcmis_views(engine, metadata, configoptions, ddhint)
        if not progargs.debug_skiptables:
            process_all_tables(engine, metadata, configoptions)
    else:
        # Tables first, then views
        if not progargs.debug_skiptables:
            process_all_tables(engine, metadata, configoptions)
        create_pcmis_views(engine, metadata, configoptions, ddhint)

    if progargs.settings_filename:
        with open(progargs.settings_filename, 'w') as f:
            print(get_pcmis_dd_settings(ddhint), file=f)
Exemple #4
0
def get_pcmis_dd_settings(ddhint: DDHint) -> str:
    """
    Draft CRATE config file settings that will allow CRATE to create a PCMIS
    data dictionary near-automatically.

    Args:
        ddhint: :class:`crate_anon.preprocess.ddhint.DDHint`

    Returns:
        the config file settings, as a string
    """
    return """
ddgen_omit_by_default = True

ddgen_omit_fields =

ddgen_include_fields = #
    # -------------------------------------------------------------------------
    # PCMIS core tables
    # -------------------------------------------------------------------------
    Lexicon*.*  # system lookup tables
    Lookups.*  # system lookup table
    lu*.*  # system lookup tables
    System*.*  # system lookup tables
    Users.*  # staff
    # -------------------------------------------------------------------------
    # Custom views from CRATE
    # -------------------------------------------------------------------------
    Case*{CRATE_VIEW_SUFFIX}.*
    Group*{CRATE_VIEW_SUFFIX}.*
    Patient*{CRATE_VIEW_SUFFIX}.*
    ReferralDetails{CRATE_VIEW_SUFFIX}.*

ddgen_allow_no_patient_info = False

ddgen_per_table_pid_field = {PCMIS_COL_PATIENT_ID}

ddgen_add_per_table_pids_to_scrubber = False

ddgen_master_pid_fieldname = {PCMIS_COL_NHS_NUMBER}

ddgen_table_denylist = #
    # -------------------------------------------------------------------------
    # Denylist: Prefixes: groups of tables; individual tables
    # -------------------------------------------------------------------------
    aspnet_*  # admin tables
    CaseCarer*  # details of carers
    CaseChild*  # details of children
    CaseEmergency*  # emergency contacts
    CaseEmployer*  # employer details
    MissingData  # system?
    ODBC_*  # admin tables
    PatientCarer*  # details of carers
    PatientDetails  # replaced by {VIEW_PT_DETAIL_W_GEOG}
    PatientChild*  # details of children
    PatientEmergency*  # emergency contacts
    PatientEmployer*  # employer details
    pcmis_*  # admin tables
    # -------------------------------------------------------------------------
    # Denylist: CPFT custom
    # -------------------------------------------------------------------------
    # -------------------------------------------------------------------------
    # Denylist: Views supersede
    # Below here, we have other tables suppressed because CRATE's views offer
    # more comprehensive alternatives
    # -------------------------------------------------------------------------
    {suppress_tables}

ddgen_table_allowlist =

ddgen_table_require_field_absolute = #
    # All tables/fields must have crate_pk
    {CRATE_COL_PK}

ddgen_table_require_field_conditional =
ddgen_field_denylist =
ddgen_field_allowlist =
ddgen_pk_fields = {CRATE_COL_PK}
ddgen_constant_content = False
ddgen_constant_content_tables =
ddgen_nonconstant_content_tables =
ddgen_addition_only = False
ddgen_addition_only_tables =
ddgen_deletion_possible_tables =

ddgen_pid_defining_fieldnames = {VIEW_PT_DETAIL_W_GEOG}.{PCMIS_COL_PATIENT_ID}

ddgen_scrubsrc_patient_fields = # several of these:
    # ----------------------------------------------------------------------
    # Original PCMIS tables (some may be superseded by views; list both here;
    # if the table is denylisted anyway, it doesn't matter).
    # We achieve "list both" by using *.
    # ----------------------------------------------------------------------
    CaseContactDetails*.CaseNumber
    CaseContactDetails*.FirstName
    CaseContactDetails*.MiddleName
    CaseContactDetails*.LastName
    CaseContactDetails*.DOB
    CaseContactDetails*.Address*
    CaseContactDetails*.TownCity
    CaseContactDetails*.County
    CaseContactDetails*.PostCode
    CaseContactDetails*.Tel*
    CaseContactDetails*.NHSNumber
    CaseContactDetails*.FamilyName
    CaseContactDetails*.PreviousName
    CaseContactDetails*.PreviousAddress*
    CaseContactDetails*.PreviousTownCity
    CaseContactDetails*.PreviousCounty
    CaseContactDetails*.PreviousPostCode
    CaseContactDetails*.Email
    CaseContactDetails*.Profession
    CaseContactDetails*.OtherCaseNumber
    CaseContactDetails*.NHSNumberVerified
    CaseContactDetails*.Voicemail*
    CaseContactDetails*.LastNameAlias
    CaseContactDetails*.FirstNameAlias
    CaseContactDetails*.DisplayName
    CaseEpisodes*.LinkedCaseNumber
    PatientDetails*.PatientID
    PatientDetails*.FirstName
    PatientDetails*.MiddleName
    PatientDetails*.LastName
    PatientDetails*.DOB
    PatientDetails*.Address*  # Address1, Address2, Address3
    PatientDetails*.TownCity
    PatientDetails*.County
    PatientDetails*.PostCode
    PatientDetails*.Tel*  # TelHome, TelMobile, TelWork
    PatientDetails*.NHSNumber
    PatientDetails*.FamilyName
    PatientDetails*.PreviousName
    PatientDetails*.PreviousAddress*
    PatientDetails*.PreviousTownCity
    PatientDetails*.PreviousCounty
    PatientDetails*.PreviousPostCode
    PatientDetails*.Email
    PatientDetails*.NHSNumberVerified
    PatientDetails*.Voicemail*
    PatientDetails*.LastNameAlias
    PatientDetails*.FirstNameAlias
    PatientDetails*.DisplayName
    # ----------------------------------------------------------------------
    # Views
    # ----------------------------------------------------------------------

ddgen_scrubsrc_thirdparty_fields = # several:
    # ----------------------------------------------------------------------
    # Original PCMIS tables (some may be superseded by views; list both here)
    # ----------------------------------------------------------------------
    CaseContactDetails*.DependantChildren  # is VARCHAR(100)
    CaseContactDetails*.ChildDetails*
    CaseContactDetails*.CarerDetails*
    CaseCarerDetails*.CarerName
    CaseCarerDetails*.CarerTel*
    CaseCarerDetails*.CarerAddress*
    CaseCarerDetails*.CarerTownCity
    CaseCarerDetails*.CarerCounty
    CaseCarerDetails*.CarerPostcode
    CaseChildDetails*.ChildCarer  # NVARCHAR(50)
    CaseChildDetails*.FirstName
    CaseChildDetails*.MiddleName
    CaseChildDetails*.LastName
    CaseChildDetails*.DOB
    CaseEmergencyDetails*.NextOfKin
    CaseEmergencyDetails*.EmergencyContact
    CaseEmergencyDetails*.EmergencyAddress*
    CaseEmergencyDetails*.EmergencyTownCity
    CaseEmergencyDetails*.EmergencyCounty
    CaseEmergencyDetails*.EmergencyPostcode
    CaseEmergencyDetails*.EmergencyTelephone
    CaseEmployerDetails*.EmployerName
    CaseEmployerDetails*.EmployerJobTitle
    CaseEmployerDetails*.EmployerContact
    CaseEmployerDetails*.EmployerAddress*
    CaseEmployerDetails*.EmployerTownCity
    CaseEmployerDetails*.EmployerCounty
    CaseEmployerDetails*.EmployerPostcode
    CaseEmployerDetails*.EmployerTelephone
    PatientCarerDetails*.CarerName
    PatientCarerDetails*.CarerTel*
    PatientCarerDetails*.CarerAddress*
    PatientCarerDetails*.CarerTownCity
    PatientCarerDetails*.CarerCounty
    PatientCarerDetails*.CarerPostCode
    PatientChildDetails*.ChildCarer  # VARCHAR(50)
    PatientChildDetails*.FirstName
    PatientChildDetails*.MiddleName
    PatientChildDetails*.LastName
    PatientChildDetails*.DOB
    PatientDetails*.DependantChildren  # is VARCHAR(100)
    PatientEmergencyDetails*.NextOfKin
    PatientEmergencyDetails*.EmergencyContact
    PatientEmergencyDetails*.EmergencyAddress*
    PatientEmergencyDetails*.EmergencyTownCity
    PatientEmergencyDetails*.EmergencyCounty
    PatientEmergencyDetails*.EmergencyPostcode
    PatientEmergencyDetails*.EmergencyTelephone
    PatientEmployerDetails*.EmployerName
    PatientEmployerDetails*.EmployerJobTitle
    PatientEmployerDetails*.EmployerContact
    PatientEmployerDetails*.EmployerAddress*
    PatientEmployerDetails*.EmployerTownCity
    PatientEmployerDetails*.EmployerCounty
    PatientEmployerDetails*.EmployerPostcode
    PatientEmployerDetails*.EmployerTelephone
    # ----------------------------------------------------------------------
    # CRATE views
    # ----------------------------------------------------------------------

ddgen_scrubsrc_thirdparty_xref_pid_fields =

ddgen_required_scrubsrc_fields = # several:
    PatientDetails{CRATE_VIEW_SUFFIX}.FirstName
    PatientDetails{CRATE_VIEW_SUFFIX}.LastName  # always present, but FamilyName can be NULL
    PatientDetails{CRATE_VIEW_SUFFIX}.DOB

ddgen_scrubmethod_code_fields = # note: case-insensitive matching:
    *PostCode

ddgen_scrubmethod_date_fields =
    *DOB*

ddgen_scrubmethod_number_fields = #
    *Tel*
    *Voicemail*
    *NHSNumber*

ddgen_scrubmethod_phrase_fields = *Address*

ddgen_safe_fields_exempt_from_scrubbing =

    # PCMIS mostly uses string column lengths of 1, 20, 32, 50, 64, 100, 128,
    # 200, 250, 255, 256, 500, 1000, 2000, 4000, unlimited.
    # So what length is the minimum for "free text"?
    # - 20: mostly postcodes, lookup codes
    # - 32: telephone numbers
    # - 50: includes CaseAssessmentContactType.Purpose, plus lookup codes.
    #       Also includes CaseChildDetails.Impact
    # - 64: mostly codes; also e.g. ReferralDetails.EndOfCareReason
    # - 100: lots of generic things, like CaseAssessmentCustom1.Q1
ddgen_min_length_for_scrubbing = 50

ddgen_truncate_date_fields =
    CaseContactDetails.DOB
    PatientDetails.DOB

ddgen_filename_to_text_fields =
ddgen_binary_to_text_field_pairs =
ddgen_skip_row_if_extract_text_fails_fields =
ddgen_rename_tables_remove_suffixes = {CRATE_VIEW_SUFFIX}
ddgen_patient_opt_out_fields =

ddgen_extra_hash_fields = CaseNumber, pcmis_case_number_hashdef

    # YOU WILL NEED TO DO THIS:
    # (1) add "pcmis_case_number_hashdef" to your "extra_hash_config_sections"
    #     setting;
    # (2) add a "pcmis_case_number_hashdef" section, like this:
    #       [pcmis_case_number_hashdef]
    #       hash_method = HMAC_MD5
    #       secret_key = my_special_secret_phrase_123
    # and obviously you should use your own secret phrase, not this one!

ddgen_index_fields =
    {PCMIS_COL_CASE_NUMBER}
    {PCMIS_COL_CONTACT_NUMBER}
    GroupCode

ddgen_allow_fulltext_indexing = True

ddgen_force_lower_case = False
ddgen_convert_odd_chars_to_underscore = True

    """.format(  # noqa
        CRATE_COL_PK=CRATE_COL_PK,
        CRATE_VIEW_SUFFIX=CRATE_VIEW_SUFFIX,
        PCMIS_COL_CASE_NUMBER=PCMIS_COL_CASE_NUMBER,
        PCMIS_COL_CONTACT_NUMBER=PCMIS_COL_CONTACT_NUMBER,
        PCMIS_COL_NHS_NUMBER=PCMIS_COL_NHS_NUMBER,
        PCMIS_COL_PATIENT_ID=PCMIS_COL_PATIENT_ID,
        PCMIS_TABLE_MASTER_PATIENT=PCMIS_TABLE_MASTER_PATIENT,
        suppress_tables="\n    ".join(ddhint.get_suppressed_tables()),
        VIEW_PT_DETAIL_W_GEOG=VIEW_PT_DETAIL_W_GEOG,
    )
Exemple #5
0
def drop_pcmis_views(engine: Engine, metadata: MetaData, progargs: Any,
                     ddhint: DDHint) -> None:  # ddhint modified
    views = get_pcmis_views(engine, progargs, ddhint)
    ddhint.drop_indexes(engine, metadata)
    for viewmaker in views:
        viewmaker.drop_view(engine)
Exemple #6
0
def create_pcmis_views(engine: Engine, metadata: MetaData, progargs: Any,
                       ddhint: DDHint) -> None:  # ddhint modified
    views = get_pcmis_views(engine, progargs, ddhint)
    for viewmaker in views:
        viewmaker.create_view(engine)
    ddhint.add_indexes(engine, metadata)
Exemple #7
0
def main() -> None:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        # formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description=r"""
*   Alters a RiO database to be suitable for CRATE.

*   By default, this treats the source database as being a copy of a RiO
    database (slightly later than version 6.2; exact version unclear).
    Use the "--rcep" (+/- "--cpft") switch(es) to treat it as a
    Servelec RiO CRIS Extract Program (RCEP) v2 output database.
    """)  # noqa
    parser.add_argument("--url", required=True, help="SQLAlchemy database URL")
    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose")
    parser.add_argument(
        "--print",
        action="store_true",
        help="Print SQL but do not execute it. (You can redirect the printed "
        "output to create an SQL script.")
    parser.add_argument("--echo", action="store_true", help="Echo SQL")

    parser.add_argument(
        "--rcep",
        action="store_true",
        help="Treat the source database as the product of Servelec's RiO CRIS "
        "Extract Program v2 (instead of raw RiO)")
    parser.add_argument(
        "--drop-danger-drop",
        action="store_true",
        help="REMOVES new columns and indexes, rather than creating them. "
        "(There's not very much danger; no real information is lost, but "
        "it might take a while to recalculate it.)")
    parser.add_argument(
        "--cpft",
        action="store_true",
        help="Apply hacks for Cambridgeshire & Peterborough NHS Foundation "
        "Trust (CPFT) RCEP database. Only appicable with --rcep")

    parser.add_argument(
        "--debug-skiptables",
        action="store_true",
        help="DEBUG-ONLY OPTION. Skip tables (view creation only)")

    prog_curr_group = parser.add_mutually_exclusive_group()
    prog_curr_group.add_argument(
        "--prognotes-current-only",
        dest="prognotes_current_only",
        action="store_true",
        help="Progress_Notes view restricted to current versions only "
        "(* default)")
    prog_curr_group.add_argument(
        "--prognotes-all",
        dest="prognotes_current_only",
        action="store_false",
        help="Progress_Notes view shows old versions too")
    parser.set_defaults(prognotes_current_only=True)

    clindocs_curr_group = parser.add_mutually_exclusive_group()
    clindocs_curr_group.add_argument(
        "--clindocs-current-only",
        dest="clindocs_current_only",
        action="store_true",
        help="Clinical_Documents view restricted to current versions only (*)")
    clindocs_curr_group.add_argument(
        "--clindocs-all",
        dest="clindocs_current_only",
        action="store_false",
        help="Clinical_Documents view shows old versions too")
    parser.set_defaults(clindocs_current_only=True)

    allerg_curr_group = parser.add_mutually_exclusive_group()
    allerg_curr_group.add_argument(
        "--allergies-current-only",
        dest="allergies_current_only",
        action="store_true",
        help="Client_Allergies view restricted to current info only")
    allerg_curr_group.add_argument(
        "--allergies-all",
        dest="allergies_current_only",
        action="store_false",
        help="Client_Allergies view shows deleted allergies too (*)")
    parser.set_defaults(allergies_current_only=False)

    audit_group = parser.add_mutually_exclusive_group()
    audit_group.add_argument(
        "--audit-info",
        dest="audit_info",
        action="store_true",
        help="Audit information (creation/update times) added to views")
    audit_group.add_argument("--no-audit-info",
                             dest="audit_info",
                             action="store_false",
                             help="No audit information added (*)")
    parser.set_defaults(audit_info=False)

    parser.add_argument(
        "--postcodedb",
        help='Specify database (schema) name for ONS Postcode Database (as '
        'imported by CRATE) to link to addresses as a view. With SQL '
        'Server, you will have to specify the schema as well as the '
        'database; e.g. "--postcodedb ONS_PD.dbo"')
    parser.add_argument(
        "--geogcols",
        nargs="*",
        default=DEFAULT_GEOG_COLS,
        help="List of geographical information columns to link in from ONS "
        "Postcode Database. BEWARE that you do not specify anything too "
        "identifying. Default: {}".format(' '.join(DEFAULT_GEOG_COLS)))

    parser.add_argument(
        "--settings-filename",
        help="Specify filename to write draft ddgen_* settings to, for use in "
        "a CRATE anonymiser configuration file.")

    progargs = parser.parse_args()

    rootlogger = logging.getLogger()
    configure_logger_for_colour(
        rootlogger, level=logging.DEBUG if progargs.verbose else logging.INFO)

    progargs.rio = not progargs.rcep
    if progargs.rcep:
        # RCEP
        progargs.master_patient_table = RCEP_TABLE_MASTER_PATIENT
        if progargs.cpft:
            progargs.full_prognotes_table = CPFT_RCEP_TABLE_FULL_PROGRESS_NOTES
            # We (CPFT) may have a hacked-in copy of the RiO main progress
            # notes table added to the RCEP output database.
        else:
            progargs.full_prognotes_table = None
            # The RCEP does not export sufficient information to distinguish
            # current and non-current versions of progress notes.
    else:
        # RiO
        progargs.master_patient_table = RIO_TABLE_MASTER_PATIENT
        progargs.full_prognotes_table = RIO_TABLE_PROGRESS_NOTES

    log.info("CRATE in-place preprocessor for RiO or RiO CRIS Extract Program "
             "(RCEP) databases")
    safeargs = {k: v for k, v in vars(progargs).items() if k != 'url'}
    log.debug("args (except url): {}".format(repr(safeargs)))
    log.info("RiO mode" if progargs.rio else "RCEP mode")

    if progargs.postcodedb and not progargs.geogcols:
        raise ValueError(
            "If you specify postcodedb, you must specify some geogcols")

    set_print_not_execute(progargs.print)

    hack_in_mssql_xml_type()

    engine = create_engine(progargs.url, echo=progargs.echo, encoding=CHARSET)
    metadata = MetaData()
    metadata.bind = engine
    log.info("Database: {}".format(repr(engine.url)))  # ... repr hides p/w
    log.debug("Dialect: {}".format(engine.dialect.name))

    log.info("Reflecting (inspecting) database...")
    metadata.reflect(engine)
    log.info("... inspection complete")

    ddhint = DDHint()

    if progargs.drop_danger_drop:
        # Drop views (and view-induced table indexes) first
        if progargs.rio:
            drop_rio_views(engine, metadata, progargs, ddhint)
        drop_view(engine, VIEW_ADDRESS_WITH_GEOGRAPHY)
        if not progargs.debug_skiptables:
            process_all_tables(engine, metadata, progargs)
    else:
        # Tables first, then views
        if not progargs.debug_skiptables:
            process_all_tables(engine, metadata, progargs)
        if progargs.postcodedb:
            add_postcode_geography_view(engine, progargs, ddhint)
        if progargs.rio:
            create_rio_views(engine, metadata, progargs, ddhint)

    if progargs.settings_filename:
        with open(progargs.settings_filename, 'w') as f:
            print(get_rio_dd_settings(ddhint), file=f)
Exemple #8
0
def add_postcode_geography_view(engine: Engine, progargs: Any,
                                ddhint: DDHint) -> None:  # ddhint modified
    # Re-read column names, as we may have inserted some recently by hand that
    # may not be in the initial metadata.
    if progargs.rio:
        addresstable = RIO_TABLE_ADDRESS
        rio_postcodecol = RIO_COL_POSTCODE
    else:
        addresstable = RCEP_TABLE_ADDRESS
        rio_postcodecol = RCEP_COL_POSTCODE
    orig_column_names = get_column_names(engine,
                                         tablename=addresstable,
                                         sort=True)

    # Remove any original column names being overridden by new ones.
    # (Could also do this the other way around!)
    geogcols_lowercase = [x.lower() for x in progargs.geogcols]
    orig_column_names = [
        x for x in orig_column_names if x.lower() not in geogcols_lowercase
    ]

    orig_column_specs = [
        "{t}.{c}".format(t=addresstable, c=col) for col in orig_column_names
    ]
    geog_col_specs = [
        "{db}.{t}.{c}".format(db=progargs.postcodedb,
                              t=ONSPD_TABLE_POSTCODE,
                              c=col)
        for col in sorted(progargs.geogcols, key=lambda x: x.lower())
    ]
    overlap = set(orig_column_names) & set(progargs.geogcols)
    if overlap:
        raise ValueError("Columns overlap: address table contains columns {}; "
                         "geogcols = {}; overlap = {}".format(
                             orig_column_names, progargs.geogcols, overlap))
    ensure_columns_present(engine,
                           tablename=addresstable,
                           column_names=[rio_postcodecol])
    select_sql = """
        SELECT {origcols},
            {geogcols}
        FROM {addresstable}
        LEFT JOIN {pdb}.{pcdtab}
        ON {addresstable}.{rio_postcodecol} = {pdb}.{pcdtab}.pcds
        -- RCEP, and presumably RiO, appear to use the ONS pcds format, of
        -- 2-4 char outward code; space; 3-char inward code.
        -- If this fails, use this slower version:
        -- ON REPLACE({addresstable}.{rio_postcodecol},
        --            ' ',
        --            '') = {pdb}.{pcdtab}.pcd_nospace
    """.format(
        addresstable=addresstable,
        origcols=",\n            ".join(orig_column_specs),
        geogcols=",\n            ".join(geog_col_specs),
        pdb=progargs.postcodedb,
        pcdtab=ONSPD_TABLE_POSTCODE,
        rio_postcodecol=rio_postcodecol,
    )
    create_view(engine, VIEW_ADDRESS_WITH_GEOGRAPHY, select_sql)
    assert_view_has_same_num_rows(engine, addresstable,
                                  VIEW_ADDRESS_WITH_GEOGRAPHY)
    ddhint.suppress_table(addresstable)