def _execute_sql(self, cursor: cx_Oracle.Cursor, statement: str, params: Dict[str, Any]) -> cx_Oracle.Cursor: """ Execute SQL query on Oracle DB using active connection. *Args*:\n _cursor_: cursor object.\n _statement_: SQL query to be executed.\n _params_: SQL query parameters.\n *Returns:*\n Query results. """ statement_with_params = self._replace_parameters_in_statement( statement, params) _connection_info = '@'.join( (cursor.connection.username, cursor.connection.dsn)) data = self.wrap_into_html_details( statement=statement_with_params, summary=f'Executed PL/SQL statement on {_connection_info}') logger.info(data, html=True) cursor.prepare(statement) self.last_executed_statement = self._replace_parameters_in_statement( statement, params) self.last_used_connection_index = self._cache.current_index cursor.execute(None, params)
def get_swissprot2enzyme(cur: cx_Oracle.Cursor) -> Dict[str, List[str]]: cur.execute(""" SELECT DISTINCT E.ACCESSION, D.DESCR FROM SPTR.DBENTRY@SWPREAD E INNER JOIN SPTR.DBENTRY_2_DESC@SWPREAD D ON E.DBENTRY_ID = D.DBENTRY_ID INNER JOIN SPTR.CV_DESC@SWPREAD C ON D.DESC_ID = C.DESC_ID WHERE E.ENTRY_TYPE = 0 -- Swiss-Prot AND E.MERGE_STATUS != 'R' -- not 'Redundant' AND E.DELETED = 'N' -- not deleted AND E.FIRST_PUBLIC IS NOT NULL -- published AND C.SUBCATG_TYPE = 'EC' """) # Accepts X.X.X.X or X.X.X.- # Does not accept preliminary EC numbers (e.g. X.X.X.nX) prog = re.compile("(\d+\.){3}(\d+|-)$") proteins = {} for acc, ecno in cur: if prog.match(ecno): try: proteins[acc].append(ecno) except KeyError: proteins[acc] = [ecno] return proteins
def _get_citations(cur: cx_Oracle.Cursor) -> dict: citations = {} cur.execute( """ SELECT PUB_ID, PUBMED_ID, ISBN, VOLUME, ISSUE, YEAR, TITLE, URL, RAWPAGES, MEDLINE_JOURNAL, ISO_JOURNAL, AUTHORS, DOI_URL FROM INTERPRO.CITATION """ ) for row in cur: authors = [] if row[11]: for name in row[11].split(','): authors.append(name.strip()) citations[row[0]] = { "PMID": row[1], "ISBN": row[2], "volume": row[3], "issue": row[4], "year": row[5], "title": row[6], "URL": row[7], "raw_pages": row[8], "medline_journal": row[9], "ISO_journal": row[10], "authors": authors, "DOI_URL": row[12] } return citations
def _export_interpro2go2uniprot(cur: cx_Oracle.Cursor, output: str): cur.execute( """ SELECT DISTINCT IG.ENTRY_AC, IG.GO_ID, M.PROTEIN_AC FROM INTERPRO.MATCH M INNER JOIN INTERPRO.ENTRY2METHOD EM ON M.METHOD_AC = EM.METHOD_AC INNER JOIN INTERPRO.ENTRY E ON EM.ENTRY_AC = E.ENTRY_AC INNER JOIN INTERPRO.INTERPRO2GO IG ON E.ENTRY_AC = IG.ENTRY_AC WHERE E.CHECKED = 'Y' """ ) tmp_path = f"{output}.tmp" with open(tmp_path, "wt") as fh: fh.write("#InterPro accession\tGO ID\tUniProt accession\n") for row in cur: fh.write('\t'.join(row) + '\n') try: os.remove(output) except FileNotFoundError: pass finally: os.rename(tmp_path, output) os.chmod(output, 0o775)
def check_proteins(cur: cx_Oracle.Cursor) -> int: num_errors = 0 cur.execute(""" SELECT PROTEIN_AC FROM INTERPRO.PROTEIN MINUS SELECT AC FROM UNIPARC.XREF WHERE DBID IN (2, 3) -- Swiss-Prot/TrEMBL AND DELETED = 'N' -- Not deleted """) for accession, in cur: logger.error(f"missing: {accession}") num_errors += 1 cur.execute(""" SELECT IP.PROTEIN_AC, IP.CRC64, UP.CRC64 FROM INTERPRO.PROTEIN IP INNER JOIN UNIPARC.XREF UX ON IP.PROTEIN_AC = UX.AC AND UX.DBID IN (2, 3) -- Swiss-Prot/TrEMBL AND UX.DELETED = 'N' -- Not deleted INNER JOIN UNIPARC.PROTEIN UP ON UX.UPI = UP.UPI WHERE IP.CRC64 != UP.CRC64 """) for accession, crc64_1, crc64_2 in cur: logger.error(f"CRC64 mismatch: {accession}: {crc64_1} / {crc64_2}") num_errors += 1 return num_errors
def is_ready(self, cur: Cursor, max_upi: str) -> bool: cur.execute( """ SELECT SUM(CNT) FROM ( SELECT COUNT(*) AS CNT FROM IPRSCAN.IPM_RUNNING_JOBS@ISPRO WHERE ANALYSIS_ID = :analysisid AND JOB_START <= :maxupi UNION ALL SELECT COUNT(*) AS CNT FROM IPRSCAN.IPM_COMPLETED_JOBS@ISPRO WHERE ANALYSIS_ID = :analysisid AND JOB_START <= :maxupi AND PERSISTED < :persisted UNION ALL SELECT COUNT(*) AS CNT FROM IPRSCAN.IPM_PERSISTED_JOBS@ISPRO WHERE ANALYSIS_ID = :analysisid AND JOB_START <= :maxupi AND PERSISTED < :persisted ) """, dict(analysisid=self.id, persisted=self.persisted, maxupi=max_upi)) if cur.fetchone()[0]: return False cur.execute( """ SELECT MAX(JOB_END) FROM IPRSCAN.IPM_PERSISTED_JOBS@ISPRO WHERE ANALYSIS_ID = :1 AND PERSISTED >= :2 """, (self.id, self.persisted)) row = cur.fetchone() return row and row[0] >= max_upi
def get_subpartitions(cur: Cursor, schema: str, table: str, partition: str) -> List[dict]: cur.execute( """ SELECT SP.SUBPARTITION_NAME, SP.SUBPARTITION_POSITION, SP.HIGH_VALUE, K.COLUMN_NAME, K.COLUMN_POSITION FROM ALL_TAB_SUBPARTITIONS SP INNER JOIN ALL_SUBPART_KEY_COLUMNS K ON SP.TABLE_OWNER = K.OWNER AND SP.TABLE_NAME = K.NAME WHERE SP.TABLE_OWNER = :1 AND SP.TABLE_NAME = :2 AND SP.PARTITION_NAME = :3 """, (schema, table, partition)) subpartitions = {} for row in cur: part_name = row[0] if part_name in subpartitions: raise ValueError("Multi-column partitioning keys not supported") subpartitions[part_name] = { "name": part_name, "position": row[1], "value": row[2], "column": row[3] } return sorted(subpartitions.values(), key=lambda x: x["position"])
def _get_entries_protein_counts( cur: cx_Oracle.Cursor) -> Dict[str, Dict[str, int]]: """ Return the number of protein matched by each InterPro entry. Only complete sequences are considered. :param cur: Oracle cursor object :return: dictionary """ taxon2superkingdom = _get_taxon2superkingdom(cur) cur.execute(""" SELECT EM.ENTRY_AC, P.TAX_ID, COUNT(DISTINCT P.PROTEIN_AC) FROM INTERPRO.PROTEIN P INNER JOIN INTERPRO.MATCH M ON P.PROTEIN_AC = M.PROTEIN_AC INNER JOIN INTERPRO.ENTRY2METHOD EM ON EM.METHOD_AC = M.METHOD_AC WHERE P.FRAGMENT = 'N' GROUP BY EM.ENTRY_AC, P.TAX_ID """) counts = {} for entry_acc, tax_id, n_proteins in cur: try: e = counts[entry_acc] except KeyError: e = counts[entry_acc] = {} superkingdom = taxon2superkingdom[tax_id] try: e[superkingdom] += n_proteins except KeyError: e[superkingdom] = n_proteins return counts
def _get_taxon2superkingdom(cur: cx_Oracle.Cursor) -> Dict[int, str]: # Load all taxa cur.execute(""" SELECT TAX_ID, SCIENTIFIC_NAME, RANK, PARENT_ID FROM INTERPRO.ETAXI """) taxa = {} for tax_id, name, rank, parent_id in cur: if tax_id in (1, 131567): """ Skip root and meta-superkingdom (131567) which contains: * Bacteria (2) * Archaea (2157) * Eukaryota (2759) """ continue elif parent_id in (1, 131567): rank = "superkingdom" parent_id = None taxa[tax_id] = (name, rank, parent_id) # For each taxon, find its root (i.e. superkingdom) taxon2superkingdom = {} for tax_id in taxa: name, rank, parent_id = taxa[tax_id] while parent_id is not None: name, rank, parent_id = taxa[parent_id] if rank == "superkingdom": taxon2superkingdom[tax_id] = name break return taxon2superkingdom
def get_indexes(cur: Cursor, owner: str, name: str) -> List[dict]: cur.execute( """ SELECT I.OWNER, I.INDEX_NAME, I.UNIQUENESS, I.TABLESPACE_NAME, I.LOGGING, I.STATUS, IC.COLUMN_NAME, IC.DESCEND FROM ALL_INDEXES I INNER JOIN ALL_IND_COLUMNS IC ON I.OWNER = IC.INDEX_OWNER AND I.INDEX_NAME = IC.INDEX_NAME AND I.TABLE_NAME = IC.TABLE_NAME WHERE I.TABLE_OWNER = :1 AND I.TABLE_NAME = :2 ORDER BY I.INDEX_NAME, IC.COLUMN_POSITION """, (owner, name)) indexes = {} for row in cur: name = row[1] try: index = indexes[name] except KeyError: index = indexes[name] = { "owner": row[0], "name": name, "is_unique": row[2] == "UNIQUE", "tablespace": row[3], "logging": row[4] == "YES", "unusable": row[5] == "UNUSABLE", # can be VALID or N/A "columns": [] } index["columns"].append({"name": row[6], "order": row[7]}) return list(indexes.values())
def update_databases(cur: cx_Oracle.Cursor): logger.info("creating table CV_DATABASE") oracle.drop_table(cur, "UNIPARC.CV_DATABASE", purge=True) cur.execute(""" CREATE TABLE UNIPARC.CV_DATABASE TABLESPACE UNIPARC_TAB NOLOGGING AS SELECT * FROM UNIPARC.CV_DATABASE@UAREAD """) cur.execute("GRANT SELECT ON UNIPARC.CV_DATABASE TO PUBLIC") cur.execute(""" CREATE UNIQUE INDEX PK_CV_DATABASE ON UNIPARC.CV_DATABASE (ID) TABLESPACE UNIPARC_IND NOLOGGING """) cur.execute(""" CREATE UNIQUE INDEX UQ_CV_DATABASE$DESCR ON UNIPARC.CV_DATABASE (DESCR) TABLESPACE UNIPARC_IND NOLOGGING """) cur.execute(""" ALTER TABLE UNIPARC.CV_DATABASE ADD ( CONSTRAINT PK_CV_DATABASE PRIMARY KEY(ID) USING INDEX PK_CV_DATABASE, CONSTRAINT UQ_CV_DATABASE$DESCR UNIQUE (DESCR) USING INDEX UQ_CV_DATABASE$DESCR ) """) oracle.gather_stats(cur, "UNIPARC", "CV_DATABASE")
def get_features(cur: cx_Oracle.Cursor) -> dict: cur.execute( """ SELECT M.METHOD_AC, M.NAME, D.DBSHORT, V.VERSION, EVI.ABBREV FROM INTERPRO.FEATURE_METHOD M INNER JOIN INTERPRO.CV_DATABASE D ON M.DBCODE = D.DBCODE INNER JOIN INTERPRO.DB_VERSION V ON D.DBCODE = V.DBCODE INNER JOIN INTERPRO.IPRSCAN2DBCODE I2D ON D.DBCODE = I2D.DBCODE INNER JOIN INTERPRO.CV_EVIDENCE EVI ON I2D.EVIDENCE = EVI.CODE """ ) features = {} for row in cur: features[row[0]] = { "accession": row[0], "name": row[1], "database": row[2], "version": row[3], "evidence": row[4] } return features
def truncate_table(cur: Cursor, name: str, reuse_storage: bool = False): if reuse_storage: sql = f"TRUNCATE TABLE {name} REUSE STORAGE" else: sql = f"TRUNCATE TABLE {name}" cur.execute(sql)
def toggle_constraint(cur: Cursor, table: str, constraint: str, enable: bool): if enable: sql = f"ALTER TABLE {table} ENABLE CONSTRAINT {constraint}" else: sql = f"ALTER TABLE {table} DISABLE CONSTRAINT {constraint}" cur.execute(sql)
def get_chain_taxonomy(cur: cx_Oracle.Cursor) -> dict: cur.execute( """ SELECT DISTINCT ASYM.ENTRY_ID, ASYM.AUTH_ASYM_ID, SRC.TAX_ID FROM PDBE.STRUCT_ASYM@PDBE_LIVE ASYM INNER JOIN PDBE.ENTITY_SRC@PDBE_LIVE SRC ON ASYM.ENTRY_ID = SRC.ENTRY_ID AND ASYM.ENTITY_ID = SRC.ENTITY_ID """ ) structures = {} for pdb_id, chain, tax_id in cur: pdb_acc = pdb_id + '_' + chain if pdb_acc in structures: s = structures[pdb_acc] else: s = structures[pdb_acc] = { "id": pdb_id, "chain": chain, "taxa": set() } s["taxa"].add(tax_id) return structures
def get_sig_protein_counts(cur: cx_Oracle.Cursor, dbid: str) -> Dict[str, Dict[str, int]]: """ Return the number of protein matches by each member database signature. Only complete sequences are considered :param cur: Oracle cursor object :param dbid: member database identifier :return: dictionary """ partition = MATCH_PARTITIONS[dbid] taxon2superkingdom = _get_taxon2superkingdom(cur) cur.execute(f""" SELECT M.METHOD_AC, P.TAX_ID, COUNT(DISTINCT P.PROTEIN_AC) FROM INTERPRO.MATCH PARTITION ({partition}) M INNER JOIN INTERPRO.PROTEIN P ON P.PROTEIN_AC = M.PROTEIN_AC WHERE P.FRAGMENT = 'N' GROUP BY M.METHOD_AC, P.TAX_ID """) counts = {} for sig_acc, tax_id, n_proteins in cur: try: sig = counts[sig_acc] except KeyError: sig = counts[sig_acc] = {} superkingdom = taxon2superkingdom[tax_id] try: sig[superkingdom] += n_proteins except KeyError: sig[superkingdom] = n_proteins return counts
def drop_mview(cur: Cursor, name: str): try: cur.execute(f"DROP MATERIALIZED VIEW {name}") except DatabaseError as exc: error, = exc.args # ORA-08103: object no longer exists # ORA-12003: materialized view does not exist if error.code not in (8103, 12003): raise exc
def drop_table(cur: Cursor, name: str, purge: bool = False): if purge: sql = f"DROP TABLE {name} PURGE" else: sql = f"DROP TABLE {name}" try: cur.execute(sql) except DatabaseError as exc: error, = exc.args # ORA-00942: table or view does not exist # ORA-08103: object no longer exists if error.code not in (942, 8103): raise exc
def insertClob(tableName: str, clob, cur: cx_Oracle.Cursor): """ Inserts no more than 4GiB of data into the DB at a time. Args: clob (clob): https://doc.ispirer.com/sqlways/Output/SQLWays-1-124.html – The CBLOB data type stores character large objects. CLOB can store up to 4 gigabytes of character data. """ if debug: print('running insertClob with maxBytesPerInsert: ', maxBytesPerInsert) if len(clob) > maxBytesPerInsert: clob = generateClob(maxBytesPerInsert) clobInsert = 'insert into {tableName} values (:clob)'.format_map(locals()) cur.execute(clobInsert, {'clob': clob}) if debug: print('returning from insertClob with maxBytesPerInsert: ', maxBytesPerInsert)
def _get_interpro_releases(cur: cx_Oracle.Cursor) -> tuple: cur.execute( """ SELECT VERSION, FILE_DATE FROM INTERPRO.DB_VERSION_AUDIT WHERE DBCODE = 'I' ORDER BY FILE_DATE """ ) rel_numbers = [] rel_dates = [] for row in cur: rel_numbers.append(row[0]) rel_dates.append(row[1]) return rel_numbers, rel_dates
def iter_alignments(cur: cx_Oracle.Cursor): cur.execute( """ SELECT QUERY_AC, TARGET_AC, EVALUE, DOMAINS FROM INTERPRO.CLAN_MATCH """ ) for query, target, evalue, clob in cur: # DOMAINS is a LOB object: need to call read() domains = [] for start, end in json.loads(clob.read()): domains.append({ "start": start, "end": end }) yield query, target, evalue, domains
def _get_name_history(cur: cx_Oracle.Cursor) -> dict: release_numbers, release_dates = _get_interpro_releases(cur) cur.execute( """ SELECT ENTRY_AC, TRIM(NAME) AS NAME, TIMESTAMP FROM INTERPRO.ENTRY_AUDIT WHERE NAME IS NOT NULL ORDER BY TIMESTAMP """ ) # Get all names assigned to each entry entries = {} for acc, name, timestamp in cur: try: entries[acc].append((name, timestamp)) except KeyError: entries[acc] = [(name, timestamp)] for acc, names in entries.items(): # Select the last name given to an entry before each release releases = {} for name, timestamp in names: i = bisect.bisect_left(release_dates, timestamp) rel = release_numbers[i - 1] if rel not in releases or timestamp > releases[rel]["time"]: releases[rel] = { "name": name, "time": timestamp } names = [] for rel in sorted(releases.values(), key=lambda x: x["time"]): last_name = rel["name"] if last_name not in names: names.append(last_name) entries[acc] = names return entries
def get_signatures(cur: cx_Oracle.Cursor) -> dict: cur.execute( """ SELECT M.METHOD_AC, M.NAME, DB.DBSHORT, EVI.ABBREV, E2M.ENTRY_AC, E2M.NAME, E2M.ABBREV, E2M.PARENT_AC FROM INTERPRO.METHOD M INNER JOIN INTERPRO.CV_DATABASE DB ON M.DBCODE = DB.DBCODE INNER JOIN INTERPRO.IPRSCAN2DBCODE I2D ON M.DBCODE = I2D.DBCODE INNER JOIN INTERPRO.CV_EVIDENCE EVI ON I2D.EVIDENCE = EVI.CODE LEFT OUTER JOIN ( SELECT E2M.METHOD_AC, E.ENTRY_AC, E.NAME, ET.ABBREV, E2E.PARENT_AC FROM INTERPRO.ENTRY E INNER JOIN INTERPRO.ENTRY2METHOD E2M ON E.ENTRY_AC = E2M.ENTRY_AC INNER JOIN INTERPRO.CV_ENTRY_TYPE ET ON E.ENTRY_TYPE = ET.CODE LEFT OUTER JOIN INTERPRO.ENTRY2ENTRY E2E ON E.ENTRY_AC = E2E.ENTRY_AC WHERE E.CHECKED = 'Y' ) E2M ON M.METHOD_AC = E2M.METHOD_AC """ ) signatures = {} for row in cur: signatures[row[0]] = { "accession": row[0], "name": row[1] or row[0], "database": row[2], "evidence": row[3], "interpro": [ ("id", row[4]), ("name", row[5]), ("type", row[6]), ("parent_id", row[7]) ] if row[4] else None } return signatures
def get_child_tables(cur: Cursor, schema: str, name: str) -> List[tuple]: cur.execute( """ SELECT TABLE_NAME, CONSTRAINT_NAME, COLUMN_NAME FROM ALL_CONS_COLUMNS WHERE OWNER = :schema AND CONSTRAINT_NAME IN ( SELECT CONSTRAINT_NAME FROM ALL_CONSTRAINTS WHERE CONSTRAINT_TYPE = 'R' -- Referential AND R_CONSTRAINT_NAME IN ( SELECT CONSTRAINT_NAME FROM ALL_CONSTRAINTS WHERE CONSTRAINT_TYPE IN ('P', 'U') -- Primary/Unique AND OWNER = :schema AND TABLE_NAME = :name ) ) """, dict(schema=schema, name=name)) return cur.fetchall()
def get_swissprot2reactome(cur: cx_Oracle.Cursor) -> Dict[str, List[tuple]]: cur.execute(""" SELECT DISTINCT E.ACCESSION, D.PRIMARY_ID, D.SECONDARY_ID FROM SPTR.DBENTRY@SWPREAD E INNER JOIN SPTR.DBENTRY_2_DATABASE@SWPREAD D ON E.DBENTRY_ID = D.DBENTRY_ID AND D.DATABASE_ID = 'GK' -- Reactome WHERE E.ENTRY_TYPE = 0 -- Swiss-Prot AND E.MERGE_STATUS != 'R' -- not 'Redundant' AND E.DELETED = 'N' -- not deleted AND E.FIRST_PUBLIC IS NOT NULL -- published """) proteins = {} for uniprot_acc, pathway_id, pathway_name in cur: try: proteins[uniprot_acc].append((pathway_id, pathway_name)) except KeyError: proteins[uniprot_acc] = [(pathway_id, pathway_name)] return proteins
def rebuild_index(cur: Cursor, name: str, parallel: bool = False): if parallel: cur.execute(f"ALTER INDEX {name} REBUILD PARALLEL") # Prevent the index to be accessed in parallel by default cur.execute(f"ALTER INDEX {name} NOPARALLEL") else: cur.execute(f"ALTER INDEX {name} REBUILD")
def get_clans(cur: cx_Oracle.Cursor) -> Dict[str, dict]: cur.execute( """ SELECT C.CLAN_AC, C.NAME, C.DESCRIPTION, LOWER(D.DBSHORT), M.MEMBER_AC, M.LEN, M.SCORE FROM INTERPRO.CLAN C INNER JOIN INTERPRO.CV_DATABASE D ON C.DBCODE = D.DBCODE INNER JOIN INTERPRO.CLAN_MEMBER M ON C.CLAN_AC = M.CLAN_AC """ ) clans = {} for row in cur: accession = row[0] name = row[1] descr = row[2] database = row[3] member_acc = row[4] seq_length = row[5] score = row[6] try: c = clans[accession] except KeyError: c = clans[accession] = { "accession": accession, "name": name, "description": descr, "database": database, "members": [] } finally: c["members"].append((member_acc, score, seq_length)) return clans
def import_from_ispro(cur: Cursor, src: str, dst: str): oracle.drop_mview(cur, dst) oracle.drop_table(cur, dst, purge=True) cur.execute(f""" CREATE TABLE IPRSCAN.{dst} NOLOGGING AS SELECT * FROM IPRSCAN.{src}@ISPRO """) """ Use remote table name to have fewer characters (no prefix) as Oracle < 12.2 do not allow object names longer than 30 characters """ cur.execute(f""" CREATE INDEX {src}$ID ON IPRSCAN.{dst} (ANALYSIS_ID) NOLOGGING """) cur.execute(f""" CREATE INDEX {src}$UPI ON IPRSCAN.{dst} (UPI) NOLOGGING """)
def update_proteins(cur: cx_Oracle.Cursor): logger.info("creating table PROTEIN") oracle.drop_mview(cur, "UNIPARC.PROTEIN") oracle.drop_table(cur, "UNIPARC.PROTEIN", purge=True) cur.execute(""" CREATE TABLE UNIPARC.PROTEIN TABLESPACE UNIPARC_TAB NOLOGGING AS SELECT UPI, TIMESTAMP, USERSTAMP, CRC64, LEN, SEQ_SHORT, SEQ_LONG, MD5 FROM UNIPARC.PROTEIN@UAREAD """) cur.execute("GRANT SELECT ON UNIPARC.PROTEIN TO PUBLIC") logger.info("creating index PK_PROTEIN") cur.execute(""" CREATE UNIQUE INDEX PK_PROTEIN ON UNIPARC.PROTEIN (UPI) TABLESPACE UNIPARC_IND """) logger.info("gathering statistics on table PROTEIN") oracle.gather_stats(cur, "UNIPARC", "PROTEIN")
def update_xrefs(cur: cx_Oracle.Cursor): logger.info("creating table XREF") oracle.drop_table(cur, "UNIPARC.XREF", purge=True) oracle.drop_table(cur, "UNIPARC.XREF_OLD", purge=True) cur.execute(""" CREATE TABLE UNIPARC.XREF TABLESPACE UNIPARC_TAB NOLOGGING AS SELECT UPI, AC, DBID, DELETED, VERSION FROM UNIPARC.XREF@UAREAD """) cur.execute("GRANT SELECT ON UNIPARC.XREF TO PUBLIC") for col in ["UPI", "AC", "DBID"]: logger.info(f"creating index I_XREF${col}") cur.execute(f""" CREATE INDEX I_XREF${col} ON UNIPARC.XREF ({col}) TABLESPACE UNIPARC_IND """) logger.info("gathering statistics on table XREF") oracle.gather_stats(cur, "UNIPARC", "XREF")