def drop_pcmis_views(engine: Engine, metadata: MetaData, configoptions: PcmisConfigOptions, ddhint: DDHint) -> None: # ddhint modified """ Drops all PCMIS views. Args: engine: an SQLAlchemy Engine metadata: SQLAlchemy MetaData containing reflected details of database configoptions: an instance of :class:`PcmisConfigOptions` ddhint: a :class:`crate_anon/preprocess/ddhint.DDHint`, which will be modified """ views = get_pcmis_views(engine, configoptions, ddhint) ddhint.drop_indexes(engine, metadata) for viewmaker in views: viewmaker.drop_view(engine)
def get_rio_views(engine: Engine, progargs: None, ddhint: DDHint, suppress_basetables: bool = True, suppress_lookup: bool = True) -> List[ViewMaker]: # ddhint modified views = [] # type: List[ViewMaker] all_tables_lower = get_table_names(engine, to_lower=True) all_views_lower = get_view_names(engine, to_lower=True) all_selectables_lower = list(set(all_tables_lower + all_views_lower)) for viewname, viewdetails in RIO_VIEWS.items(): basetable = viewdetails['basetable'] if basetable.lower() not in all_selectables_lower: log.warning("Skipping view {} as base table/view {} not " "present".format(viewname, basetable)) continue suppress_basetable = viewdetails.get('suppress_basetable', suppress_basetables) suppress_other_tables = viewdetails.get('suppress_other_tables', []) if suppress_basetable: ddhint.suppress_table(basetable) ddhint.suppress_tables(suppress_other_tables) rename = viewdetails.get('rename', None) enforce_same_n_rows_as_base = viewdetails.get( 'enforce_same_n_rows_as_base', True) # noinspection PyTypeChecker viewmaker = ViewMaker( viewname=viewname, engine=engine, basetable=basetable, rename=rename, progargs=progargs, enforce_same_n_rows_as_base=enforce_same_n_rows_as_base) if 'add' in viewdetails: for addition in viewdetails['add']: func = addition['function'] kwargs = addition.get('kwargs', {}) kwargs['viewmaker'] = viewmaker func(**kwargs) # will alter viewmaker if progargs.audit_info: rio_add_audit_info(viewmaker) # will alter viewmaker if suppress_lookup: ddhint.suppress_tables(viewmaker.get_lookup_tables()) ddhint.add_bulk_source_index_request( viewmaker.get_index_request_dict()) views.append(viewmaker) return views
def main() -> None: """ Command-line parser. See command-line help. """ # noinspection PyTypeChecker parser = argparse.ArgumentParser( formatter_class=RawDescriptionArgumentDefaultsHelpFormatter, description="Alters a PCMIS database to be suitable for CRATE.") parser.add_argument("--url", required=True, help="SQLAlchemy database URL") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose") parser.add_argument( "--print", action="store_true", help="Print SQL but do not execute it. (You can redirect the printed " "output to create an SQL script.") parser.add_argument("--echo", action="store_true", help="Echo SQL") parser.add_argument( "--drop-danger-drop", action="store_true", help="REMOVES new columns and indexes, rather than creating them. " "(There's not very much danger; no real information is lost, but " "it might take a while to recalculate it.)") parser.add_argument( "--debug-skiptables", action="store_true", help="DEBUG-ONLY OPTION. Skip tables (view creation only)") parser.add_argument( "--postcodedb", help='Specify database (schema) name for ONS Postcode Database (as ' 'imported by CRATE) to link to addresses as a view. With SQL ' 'Server, you will have to specify the schema as well as the ' 'database; e.g. "--postcodedb ONS_PD.dbo"') parser.add_argument( "--geogcols", nargs="*", default=DEFAULT_GEOG_COLS, help="List of geographical information columns to link in from ONS " "Postcode Database. BEWARE that you do not specify anything too " "identifying.") parser.add_argument( "--settings-filename", help="Specify filename to write draft ddgen_* settings to, for use in " "a CRATE anonymiser configuration file.") progargs = parser.parse_args() rootlogger = logging.getLogger() configure_logger_for_colour( rootlogger, level=logging.DEBUG if progargs.verbose else logging.INFO) log.info("CRATE in-place preprocessor for PCMIS databases") safeargs = {k: v for k, v in vars(progargs).items() if k != 'url'} log.debug(f"args (except url): {repr(safeargs)}") if progargs.postcodedb and not progargs.geogcols: raise ValueError( "If you specify postcodedb, you must specify some geogcols") set_print_not_execute(progargs.print) hack_in_mssql_xml_type() engine = create_engine(progargs.url, echo=progargs.echo, encoding=CHARSET) metadata = MetaData() metadata.bind = engine log.info(f"Database: {repr(engine.url)}") # ... repr hides p/w log.debug(f"Dialect: {engine.dialect.name}") log.info("Reflecting (inspecting) database...") metadata.reflect(engine) log.info("... inspection complete") ddhint = DDHint() configoptions = PcmisConfigOptions( postcodedb=progargs.postcodedb, geogcols=progargs.geogcols, print_sql_only=progargs.print, drop_not_create=progargs.drop_danger_drop, ) if progargs.drop_danger_drop: # Drop views (and view-induced table indexes) first drop_pcmis_views(engine, metadata, configoptions, ddhint) if not progargs.debug_skiptables: process_all_tables(engine, metadata, configoptions) else: # Tables first, then views if not progargs.debug_skiptables: process_all_tables(engine, metadata, configoptions) create_pcmis_views(engine, metadata, configoptions, ddhint) if progargs.settings_filename: with open(progargs.settings_filename, 'w') as f: print(get_pcmis_dd_settings(ddhint), file=f)
def get_pcmis_dd_settings(ddhint: DDHint) -> str: """ Draft CRATE config file settings that will allow CRATE to create a PCMIS data dictionary near-automatically. Args: ddhint: :class:`crate_anon.preprocess.ddhint.DDHint` Returns: the config file settings, as a string """ return """ ddgen_omit_by_default = True ddgen_omit_fields = ddgen_include_fields = # # ------------------------------------------------------------------------- # PCMIS core tables # ------------------------------------------------------------------------- Lexicon*.* # system lookup tables Lookups.* # system lookup table lu*.* # system lookup tables System*.* # system lookup tables Users.* # staff # ------------------------------------------------------------------------- # Custom views from CRATE # ------------------------------------------------------------------------- Case*{CRATE_VIEW_SUFFIX}.* Group*{CRATE_VIEW_SUFFIX}.* Patient*{CRATE_VIEW_SUFFIX}.* ReferralDetails{CRATE_VIEW_SUFFIX}.* ddgen_allow_no_patient_info = False ddgen_per_table_pid_field = {PCMIS_COL_PATIENT_ID} ddgen_add_per_table_pids_to_scrubber = False ddgen_master_pid_fieldname = {PCMIS_COL_NHS_NUMBER} ddgen_table_denylist = # # ------------------------------------------------------------------------- # Denylist: Prefixes: groups of tables; individual tables # ------------------------------------------------------------------------- aspnet_* # admin tables CaseCarer* # details of carers CaseChild* # details of children CaseEmergency* # emergency contacts CaseEmployer* # employer details MissingData # system? ODBC_* # admin tables PatientCarer* # details of carers PatientDetails # replaced by {VIEW_PT_DETAIL_W_GEOG} PatientChild* # details of children PatientEmergency* # emergency contacts PatientEmployer* # employer details pcmis_* # admin tables # ------------------------------------------------------------------------- # Denylist: CPFT custom # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- # Denylist: Views supersede # Below here, we have other tables suppressed because CRATE's views offer # more comprehensive alternatives # ------------------------------------------------------------------------- {suppress_tables} ddgen_table_allowlist = ddgen_table_require_field_absolute = # # All tables/fields must have crate_pk {CRATE_COL_PK} ddgen_table_require_field_conditional = ddgen_field_denylist = ddgen_field_allowlist = ddgen_pk_fields = {CRATE_COL_PK} ddgen_constant_content = False ddgen_constant_content_tables = ddgen_nonconstant_content_tables = ddgen_addition_only = False ddgen_addition_only_tables = ddgen_deletion_possible_tables = ddgen_pid_defining_fieldnames = {VIEW_PT_DETAIL_W_GEOG}.{PCMIS_COL_PATIENT_ID} ddgen_scrubsrc_patient_fields = # several of these: # ---------------------------------------------------------------------- # Original PCMIS tables (some may be superseded by views; list both here; # if the table is denylisted anyway, it doesn't matter). # We achieve "list both" by using *. # ---------------------------------------------------------------------- CaseContactDetails*.CaseNumber CaseContactDetails*.FirstName CaseContactDetails*.MiddleName CaseContactDetails*.LastName CaseContactDetails*.DOB CaseContactDetails*.Address* CaseContactDetails*.TownCity CaseContactDetails*.County CaseContactDetails*.PostCode CaseContactDetails*.Tel* CaseContactDetails*.NHSNumber CaseContactDetails*.FamilyName CaseContactDetails*.PreviousName CaseContactDetails*.PreviousAddress* CaseContactDetails*.PreviousTownCity CaseContactDetails*.PreviousCounty CaseContactDetails*.PreviousPostCode CaseContactDetails*.Email CaseContactDetails*.Profession CaseContactDetails*.OtherCaseNumber CaseContactDetails*.NHSNumberVerified CaseContactDetails*.Voicemail* CaseContactDetails*.LastNameAlias CaseContactDetails*.FirstNameAlias CaseContactDetails*.DisplayName CaseEpisodes*.LinkedCaseNumber PatientDetails*.PatientID PatientDetails*.FirstName PatientDetails*.MiddleName PatientDetails*.LastName PatientDetails*.DOB PatientDetails*.Address* # Address1, Address2, Address3 PatientDetails*.TownCity PatientDetails*.County PatientDetails*.PostCode PatientDetails*.Tel* # TelHome, TelMobile, TelWork PatientDetails*.NHSNumber PatientDetails*.FamilyName PatientDetails*.PreviousName PatientDetails*.PreviousAddress* PatientDetails*.PreviousTownCity PatientDetails*.PreviousCounty PatientDetails*.PreviousPostCode PatientDetails*.Email PatientDetails*.NHSNumberVerified PatientDetails*.Voicemail* PatientDetails*.LastNameAlias PatientDetails*.FirstNameAlias PatientDetails*.DisplayName # ---------------------------------------------------------------------- # Views # ---------------------------------------------------------------------- ddgen_scrubsrc_thirdparty_fields = # several: # ---------------------------------------------------------------------- # Original PCMIS tables (some may be superseded by views; list both here) # ---------------------------------------------------------------------- CaseContactDetails*.DependantChildren # is VARCHAR(100) CaseContactDetails*.ChildDetails* CaseContactDetails*.CarerDetails* CaseCarerDetails*.CarerName CaseCarerDetails*.CarerTel* CaseCarerDetails*.CarerAddress* CaseCarerDetails*.CarerTownCity CaseCarerDetails*.CarerCounty CaseCarerDetails*.CarerPostcode CaseChildDetails*.ChildCarer # NVARCHAR(50) CaseChildDetails*.FirstName CaseChildDetails*.MiddleName CaseChildDetails*.LastName CaseChildDetails*.DOB CaseEmergencyDetails*.NextOfKin CaseEmergencyDetails*.EmergencyContact CaseEmergencyDetails*.EmergencyAddress* CaseEmergencyDetails*.EmergencyTownCity CaseEmergencyDetails*.EmergencyCounty CaseEmergencyDetails*.EmergencyPostcode CaseEmergencyDetails*.EmergencyTelephone CaseEmployerDetails*.EmployerName CaseEmployerDetails*.EmployerJobTitle CaseEmployerDetails*.EmployerContact CaseEmployerDetails*.EmployerAddress* CaseEmployerDetails*.EmployerTownCity CaseEmployerDetails*.EmployerCounty CaseEmployerDetails*.EmployerPostcode CaseEmployerDetails*.EmployerTelephone PatientCarerDetails*.CarerName PatientCarerDetails*.CarerTel* PatientCarerDetails*.CarerAddress* PatientCarerDetails*.CarerTownCity PatientCarerDetails*.CarerCounty PatientCarerDetails*.CarerPostCode PatientChildDetails*.ChildCarer # VARCHAR(50) PatientChildDetails*.FirstName PatientChildDetails*.MiddleName PatientChildDetails*.LastName PatientChildDetails*.DOB PatientDetails*.DependantChildren # is VARCHAR(100) PatientEmergencyDetails*.NextOfKin PatientEmergencyDetails*.EmergencyContact PatientEmergencyDetails*.EmergencyAddress* PatientEmergencyDetails*.EmergencyTownCity PatientEmergencyDetails*.EmergencyCounty PatientEmergencyDetails*.EmergencyPostcode PatientEmergencyDetails*.EmergencyTelephone PatientEmployerDetails*.EmployerName PatientEmployerDetails*.EmployerJobTitle PatientEmployerDetails*.EmployerContact PatientEmployerDetails*.EmployerAddress* PatientEmployerDetails*.EmployerTownCity PatientEmployerDetails*.EmployerCounty PatientEmployerDetails*.EmployerPostcode PatientEmployerDetails*.EmployerTelephone # ---------------------------------------------------------------------- # CRATE views # ---------------------------------------------------------------------- ddgen_scrubsrc_thirdparty_xref_pid_fields = ddgen_required_scrubsrc_fields = # several: PatientDetails{CRATE_VIEW_SUFFIX}.FirstName PatientDetails{CRATE_VIEW_SUFFIX}.LastName # always present, but FamilyName can be NULL PatientDetails{CRATE_VIEW_SUFFIX}.DOB ddgen_scrubmethod_code_fields = # note: case-insensitive matching: *PostCode ddgen_scrubmethod_date_fields = *DOB* ddgen_scrubmethod_number_fields = # *Tel* *Voicemail* *NHSNumber* ddgen_scrubmethod_phrase_fields = *Address* ddgen_safe_fields_exempt_from_scrubbing = # PCMIS mostly uses string column lengths of 1, 20, 32, 50, 64, 100, 128, # 200, 250, 255, 256, 500, 1000, 2000, 4000, unlimited. # So what length is the minimum for "free text"? # - 20: mostly postcodes, lookup codes # - 32: telephone numbers # - 50: includes CaseAssessmentContactType.Purpose, plus lookup codes. # Also includes CaseChildDetails.Impact # - 64: mostly codes; also e.g. ReferralDetails.EndOfCareReason # - 100: lots of generic things, like CaseAssessmentCustom1.Q1 ddgen_min_length_for_scrubbing = 50 ddgen_truncate_date_fields = CaseContactDetails.DOB PatientDetails.DOB ddgen_filename_to_text_fields = ddgen_binary_to_text_field_pairs = ddgen_skip_row_if_extract_text_fails_fields = ddgen_rename_tables_remove_suffixes = {CRATE_VIEW_SUFFIX} ddgen_patient_opt_out_fields = ddgen_extra_hash_fields = CaseNumber, pcmis_case_number_hashdef # YOU WILL NEED TO DO THIS: # (1) add "pcmis_case_number_hashdef" to your "extra_hash_config_sections" # setting; # (2) add a "pcmis_case_number_hashdef" section, like this: # [pcmis_case_number_hashdef] # hash_method = HMAC_MD5 # secret_key = my_special_secret_phrase_123 # and obviously you should use your own secret phrase, not this one! ddgen_index_fields = {PCMIS_COL_CASE_NUMBER} {PCMIS_COL_CONTACT_NUMBER} GroupCode ddgen_allow_fulltext_indexing = True ddgen_force_lower_case = False ddgen_convert_odd_chars_to_underscore = True """.format( # noqa CRATE_COL_PK=CRATE_COL_PK, CRATE_VIEW_SUFFIX=CRATE_VIEW_SUFFIX, PCMIS_COL_CASE_NUMBER=PCMIS_COL_CASE_NUMBER, PCMIS_COL_CONTACT_NUMBER=PCMIS_COL_CONTACT_NUMBER, PCMIS_COL_NHS_NUMBER=PCMIS_COL_NHS_NUMBER, PCMIS_COL_PATIENT_ID=PCMIS_COL_PATIENT_ID, PCMIS_TABLE_MASTER_PATIENT=PCMIS_TABLE_MASTER_PATIENT, suppress_tables="\n ".join(ddhint.get_suppressed_tables()), VIEW_PT_DETAIL_W_GEOG=VIEW_PT_DETAIL_W_GEOG, )
def drop_pcmis_views(engine: Engine, metadata: MetaData, progargs: Any, ddhint: DDHint) -> None: # ddhint modified views = get_pcmis_views(engine, progargs, ddhint) ddhint.drop_indexes(engine, metadata) for viewmaker in views: viewmaker.drop_view(engine)
def create_pcmis_views(engine: Engine, metadata: MetaData, progargs: Any, ddhint: DDHint) -> None: # ddhint modified views = get_pcmis_views(engine, progargs, ddhint) for viewmaker in views: viewmaker.create_view(engine) ddhint.add_indexes(engine, metadata)
def main() -> None: parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, # formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=r""" * Alters a RiO database to be suitable for CRATE. * By default, this treats the source database as being a copy of a RiO database (slightly later than version 6.2; exact version unclear). Use the "--rcep" (+/- "--cpft") switch(es) to treat it as a Servelec RiO CRIS Extract Program (RCEP) v2 output database. """) # noqa parser.add_argument("--url", required=True, help="SQLAlchemy database URL") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose") parser.add_argument( "--print", action="store_true", help="Print SQL but do not execute it. (You can redirect the printed " "output to create an SQL script.") parser.add_argument("--echo", action="store_true", help="Echo SQL") parser.add_argument( "--rcep", action="store_true", help="Treat the source database as the product of Servelec's RiO CRIS " "Extract Program v2 (instead of raw RiO)") parser.add_argument( "--drop-danger-drop", action="store_true", help="REMOVES new columns and indexes, rather than creating them. " "(There's not very much danger; no real information is lost, but " "it might take a while to recalculate it.)") parser.add_argument( "--cpft", action="store_true", help="Apply hacks for Cambridgeshire & Peterborough NHS Foundation " "Trust (CPFT) RCEP database. Only appicable with --rcep") parser.add_argument( "--debug-skiptables", action="store_true", help="DEBUG-ONLY OPTION. Skip tables (view creation only)") prog_curr_group = parser.add_mutually_exclusive_group() prog_curr_group.add_argument( "--prognotes-current-only", dest="prognotes_current_only", action="store_true", help="Progress_Notes view restricted to current versions only " "(* default)") prog_curr_group.add_argument( "--prognotes-all", dest="prognotes_current_only", action="store_false", help="Progress_Notes view shows old versions too") parser.set_defaults(prognotes_current_only=True) clindocs_curr_group = parser.add_mutually_exclusive_group() clindocs_curr_group.add_argument( "--clindocs-current-only", dest="clindocs_current_only", action="store_true", help="Clinical_Documents view restricted to current versions only (*)") clindocs_curr_group.add_argument( "--clindocs-all", dest="clindocs_current_only", action="store_false", help="Clinical_Documents view shows old versions too") parser.set_defaults(clindocs_current_only=True) allerg_curr_group = parser.add_mutually_exclusive_group() allerg_curr_group.add_argument( "--allergies-current-only", dest="allergies_current_only", action="store_true", help="Client_Allergies view restricted to current info only") allerg_curr_group.add_argument( "--allergies-all", dest="allergies_current_only", action="store_false", help="Client_Allergies view shows deleted allergies too (*)") parser.set_defaults(allergies_current_only=False) audit_group = parser.add_mutually_exclusive_group() audit_group.add_argument( "--audit-info", dest="audit_info", action="store_true", help="Audit information (creation/update times) added to views") audit_group.add_argument("--no-audit-info", dest="audit_info", action="store_false", help="No audit information added (*)") parser.set_defaults(audit_info=False) parser.add_argument( "--postcodedb", help='Specify database (schema) name for ONS Postcode Database (as ' 'imported by CRATE) to link to addresses as a view. With SQL ' 'Server, you will have to specify the schema as well as the ' 'database; e.g. "--postcodedb ONS_PD.dbo"') parser.add_argument( "--geogcols", nargs="*", default=DEFAULT_GEOG_COLS, help="List of geographical information columns to link in from ONS " "Postcode Database. BEWARE that you do not specify anything too " "identifying. Default: {}".format(' '.join(DEFAULT_GEOG_COLS))) parser.add_argument( "--settings-filename", help="Specify filename to write draft ddgen_* settings to, for use in " "a CRATE anonymiser configuration file.") progargs = parser.parse_args() rootlogger = logging.getLogger() configure_logger_for_colour( rootlogger, level=logging.DEBUG if progargs.verbose else logging.INFO) progargs.rio = not progargs.rcep if progargs.rcep: # RCEP progargs.master_patient_table = RCEP_TABLE_MASTER_PATIENT if progargs.cpft: progargs.full_prognotes_table = CPFT_RCEP_TABLE_FULL_PROGRESS_NOTES # We (CPFT) may have a hacked-in copy of the RiO main progress # notes table added to the RCEP output database. else: progargs.full_prognotes_table = None # The RCEP does not export sufficient information to distinguish # current and non-current versions of progress notes. else: # RiO progargs.master_patient_table = RIO_TABLE_MASTER_PATIENT progargs.full_prognotes_table = RIO_TABLE_PROGRESS_NOTES log.info("CRATE in-place preprocessor for RiO or RiO CRIS Extract Program " "(RCEP) databases") safeargs = {k: v for k, v in vars(progargs).items() if k != 'url'} log.debug("args (except url): {}".format(repr(safeargs))) log.info("RiO mode" if progargs.rio else "RCEP mode") if progargs.postcodedb and not progargs.geogcols: raise ValueError( "If you specify postcodedb, you must specify some geogcols") set_print_not_execute(progargs.print) hack_in_mssql_xml_type() engine = create_engine(progargs.url, echo=progargs.echo, encoding=CHARSET) metadata = MetaData() metadata.bind = engine log.info("Database: {}".format(repr(engine.url))) # ... repr hides p/w log.debug("Dialect: {}".format(engine.dialect.name)) log.info("Reflecting (inspecting) database...") metadata.reflect(engine) log.info("... inspection complete") ddhint = DDHint() if progargs.drop_danger_drop: # Drop views (and view-induced table indexes) first if progargs.rio: drop_rio_views(engine, metadata, progargs, ddhint) drop_view(engine, VIEW_ADDRESS_WITH_GEOGRAPHY) if not progargs.debug_skiptables: process_all_tables(engine, metadata, progargs) else: # Tables first, then views if not progargs.debug_skiptables: process_all_tables(engine, metadata, progargs) if progargs.postcodedb: add_postcode_geography_view(engine, progargs, ddhint) if progargs.rio: create_rio_views(engine, metadata, progargs, ddhint) if progargs.settings_filename: with open(progargs.settings_filename, 'w') as f: print(get_rio_dd_settings(ddhint), file=f)
def add_postcode_geography_view(engine: Engine, progargs: Any, ddhint: DDHint) -> None: # ddhint modified # Re-read column names, as we may have inserted some recently by hand that # may not be in the initial metadata. if progargs.rio: addresstable = RIO_TABLE_ADDRESS rio_postcodecol = RIO_COL_POSTCODE else: addresstable = RCEP_TABLE_ADDRESS rio_postcodecol = RCEP_COL_POSTCODE orig_column_names = get_column_names(engine, tablename=addresstable, sort=True) # Remove any original column names being overridden by new ones. # (Could also do this the other way around!) geogcols_lowercase = [x.lower() for x in progargs.geogcols] orig_column_names = [ x for x in orig_column_names if x.lower() not in geogcols_lowercase ] orig_column_specs = [ "{t}.{c}".format(t=addresstable, c=col) for col in orig_column_names ] geog_col_specs = [ "{db}.{t}.{c}".format(db=progargs.postcodedb, t=ONSPD_TABLE_POSTCODE, c=col) for col in sorted(progargs.geogcols, key=lambda x: x.lower()) ] overlap = set(orig_column_names) & set(progargs.geogcols) if overlap: raise ValueError("Columns overlap: address table contains columns {}; " "geogcols = {}; overlap = {}".format( orig_column_names, progargs.geogcols, overlap)) ensure_columns_present(engine, tablename=addresstable, column_names=[rio_postcodecol]) select_sql = """ SELECT {origcols}, {geogcols} FROM {addresstable} LEFT JOIN {pdb}.{pcdtab} ON {addresstable}.{rio_postcodecol} = {pdb}.{pcdtab}.pcds -- RCEP, and presumably RiO, appear to use the ONS pcds format, of -- 2-4 char outward code; space; 3-char inward code. -- If this fails, use this slower version: -- ON REPLACE({addresstable}.{rio_postcodecol}, -- ' ', -- '') = {pdb}.{pcdtab}.pcd_nospace """.format( addresstable=addresstable, origcols=",\n ".join(orig_column_specs), geogcols=",\n ".join(geog_col_specs), pdb=progargs.postcodedb, pcdtab=ONSPD_TABLE_POSTCODE, rio_postcodecol=rio_postcodecol, ) create_view(engine, VIEW_ADDRESS_WITH_GEOGRAPHY, select_sql) assert_view_has_same_num_rows(engine, addresstable, VIEW_ADDRESS_WITH_GEOGRAPHY) ddhint.suppress_table(addresstable)