def _execute_sql(self, cursor: cx_Oracle.Cursor, statement: str,
                     params: Dict[str, Any]) -> cx_Oracle.Cursor:
        """ Execute SQL query on Oracle DB using active connection.

        *Args*:\n
            _cursor_: cursor object.\n
            _statement_: SQL query to be executed.\n
            _params_: SQL query parameters.\n

        *Returns:*\n
            Query results.
        """
        statement_with_params = self._replace_parameters_in_statement(
            statement, params)
        _connection_info = '@'.join(
            (cursor.connection.username, cursor.connection.dsn))
        data = self.wrap_into_html_details(
            statement=statement_with_params,
            summary=f'Executed PL/SQL statement on {_connection_info}')
        logger.info(data, html=True)
        cursor.prepare(statement)
        self.last_executed_statement = self._replace_parameters_in_statement(
            statement, params)
        self.last_used_connection_index = self._cache.current_index
        cursor.execute(None, params)
Ejemplo n.º 2
0
def get_swissprot2enzyme(cur: cx_Oracle.Cursor) -> Dict[str, List[str]]:
    cur.execute("""
        SELECT DISTINCT E.ACCESSION, D.DESCR
        FROM SPTR.DBENTRY@SWPREAD E
        INNER JOIN SPTR.DBENTRY_2_DESC@SWPREAD D
          ON E.DBENTRY_ID = D.DBENTRY_ID
        INNER JOIN SPTR.CV_DESC@SWPREAD C
          ON D.DESC_ID = C.DESC_ID
        WHERE E.ENTRY_TYPE = 0            -- Swiss-Prot
          AND E.MERGE_STATUS != 'R'       -- not 'Redundant'
          AND E.DELETED = 'N'             -- not deleted
          AND E.FIRST_PUBLIC IS NOT NULL  -- published
          AND C.SUBCATG_TYPE = 'EC'
        """)

    # Accepts X.X.X.X or X.X.X.-
    # Does not accept preliminary EC numbers (e.g. X.X.X.nX)
    prog = re.compile("(\d+\.){3}(\d+|-)$")
    proteins = {}
    for acc, ecno in cur:
        if prog.match(ecno):
            try:
                proteins[acc].append(ecno)
            except KeyError:
                proteins[acc] = [ecno]

    return proteins
Ejemplo n.º 3
0
def _get_citations(cur: cx_Oracle.Cursor) -> dict:
    citations = {}
    cur.execute(
        """
        SELECT PUB_ID, PUBMED_ID, ISBN, VOLUME, ISSUE,
          YEAR, TITLE, URL, RAWPAGES, MEDLINE_JOURNAL,
          ISO_JOURNAL, AUTHORS, DOI_URL
        FROM INTERPRO.CITATION
        """
    )
    for row in cur:
        authors = []
        if row[11]:
            for name in row[11].split(','):
                authors.append(name.strip())

        citations[row[0]] = {
            "PMID": row[1],
            "ISBN": row[2],
            "volume": row[3],
            "issue": row[4],
            "year": row[5],
            "title": row[6],
            "URL": row[7],
            "raw_pages": row[8],
            "medline_journal": row[9],
            "ISO_journal": row[10],
            "authors": authors,
            "DOI_URL": row[12]
        }
    return citations
Ejemplo n.º 4
0
def get_chain_taxonomy(cur: cx_Oracle.Cursor) -> dict:
    cur.execute(
        """
        SELECT DISTINCT 
          ASYM.ENTRY_ID, 
          ASYM.AUTH_ASYM_ID, 
          SRC.TAX_ID
        FROM PDBE.STRUCT_ASYM@PDBE_LIVE ASYM
        INNER JOIN PDBE.ENTITY_SRC@PDBE_LIVE SRC
          ON ASYM.ENTRY_ID = SRC.ENTRY_ID AND ASYM.ENTITY_ID = SRC.ENTITY_ID
        """
    )

    structures = {}
    for pdb_id, chain, tax_id in cur:
        pdb_acc = pdb_id + '_' + chain

        if pdb_acc in structures:
            s = structures[pdb_acc]
        else:
            s = structures[pdb_acc] = {
                "id": pdb_id,
                "chain": chain,
                "taxa": set()
            }
        s["taxa"].add(tax_id)

    return structures
Ejemplo n.º 5
0
def check_proteins(cur: cx_Oracle.Cursor) -> int:
    num_errors = 0
    cur.execute("""
        SELECT PROTEIN_AC
        FROM INTERPRO.PROTEIN
        MINUS
        SELECT AC
        FROM UNIPARC.XREF
        WHERE DBID IN (2, 3)   -- Swiss-Prot/TrEMBL
        AND DELETED = 'N'      -- Not deleted
        """)

    for accession, in cur:
        logger.error(f"missing: {accession}")
        num_errors += 1

    cur.execute("""
        SELECT IP.PROTEIN_AC, IP.CRC64, UP.CRC64
        FROM INTERPRO.PROTEIN IP
        INNER JOIN UNIPARC.XREF UX
          ON IP.PROTEIN_AC = UX.AC
          AND UX.DBID IN (2, 3)   -- Swiss-Prot/TrEMBL
          AND UX.DELETED = 'N'    -- Not deleted
        INNER JOIN UNIPARC.PROTEIN UP
          ON UX.UPI = UP.UPI
        WHERE IP.CRC64 != UP.CRC64
        """)

    for accession, crc64_1, crc64_2 in cur:
        logger.error(f"CRC64 mismatch: {accession}: {crc64_1} / {crc64_2}")
        num_errors += 1

    return num_errors
Ejemplo n.º 6
0
def _export_interpro2go2uniprot(cur: cx_Oracle.Cursor, output: str):
    cur.execute(
        """
        SELECT DISTINCT IG.ENTRY_AC, IG.GO_ID, M.PROTEIN_AC
        FROM INTERPRO.MATCH M
        INNER JOIN INTERPRO.ENTRY2METHOD EM
          ON M.METHOD_AC = EM.METHOD_AC
        INNER JOIN INTERPRO.ENTRY E
          ON EM.ENTRY_AC = E.ENTRY_AC
        INNER JOIN INTERPRO.INTERPRO2GO IG
          ON E.ENTRY_AC = IG.ENTRY_AC
        WHERE E.CHECKED = 'Y'
        """
    )

    tmp_path = f"{output}.tmp"
    with open(tmp_path, "wt") as fh:
        fh.write("#InterPro accession\tGO ID\tUniProt accession\n")

        for row in cur:
            fh.write('\t'.join(row) + '\n')

    try:
        os.remove(output)
    except FileNotFoundError:
        pass
    finally:
        os.rename(tmp_path, output)
        os.chmod(output, 0o775)
Ejemplo n.º 7
0
def get_subpartitions(cur: Cursor, schema: str, table: str,
                      partition: str) -> List[dict]:
    cur.execute(
        """
        SELECT SP.SUBPARTITION_NAME, SP.SUBPARTITION_POSITION, SP.HIGH_VALUE,
               K.COLUMN_NAME, K.COLUMN_POSITION
        FROM ALL_TAB_SUBPARTITIONS SP
        INNER JOIN ALL_SUBPART_KEY_COLUMNS K
            ON SP.TABLE_OWNER = K.OWNER
            AND SP.TABLE_NAME = K.NAME
        WHERE SP.TABLE_OWNER = :1
        AND SP.TABLE_NAME = :2
        AND SP.PARTITION_NAME = :3
        """, (schema, table, partition))

    subpartitions = {}
    for row in cur:
        part_name = row[0]
        if part_name in subpartitions:
            raise ValueError("Multi-column partitioning keys not supported")

        subpartitions[part_name] = {
            "name": part_name,
            "position": row[1],
            "value": row[2],
            "column": row[3]
        }

    return sorted(subpartitions.values(), key=lambda x: x["position"])
Ejemplo n.º 8
0
def _get_taxon2superkingdom(cur: cx_Oracle.Cursor) -> Dict[int, str]:
    # Load all taxa
    cur.execute("""
        SELECT TAX_ID, SCIENTIFIC_NAME, RANK, PARENT_ID
        FROM INTERPRO.ETAXI
        """)
    taxa = {}
    for tax_id, name, rank, parent_id in cur:
        if tax_id in (1, 131567):
            """
            Skip root and meta-superkingdom (131567) which contains:
                * Bacteria (2)
                * Archaea (2157)
                * Eukaryota (2759)
            """
            continue
        elif parent_id in (1, 131567):
            rank = "superkingdom"
            parent_id = None

        taxa[tax_id] = (name, rank, parent_id)

    # For each taxon, find its root (i.e. superkingdom)
    taxon2superkingdom = {}
    for tax_id in taxa:
        name, rank, parent_id = taxa[tax_id]

        while parent_id is not None:
            name, rank, parent_id = taxa[parent_id]
            if rank == "superkingdom":
                taxon2superkingdom[tax_id] = name
                break

    return taxon2superkingdom
Ejemplo n.º 9
0
def get_indexes(cur: Cursor, owner: str, name: str) -> List[dict]:
    cur.execute(
        """
        SELECT I.OWNER, I.INDEX_NAME, I.UNIQUENESS, I.TABLESPACE_NAME, 
               I.LOGGING, I.STATUS, IC.COLUMN_NAME, IC.DESCEND
        FROM ALL_INDEXES I
        INNER JOIN ALL_IND_COLUMNS IC
          ON I.OWNER = IC.INDEX_OWNER
          AND I.INDEX_NAME = IC.INDEX_NAME
          AND I.TABLE_NAME = IC.TABLE_NAME
        WHERE I.TABLE_OWNER = :1
        AND I.TABLE_NAME = :2
        ORDER BY I.INDEX_NAME, IC.COLUMN_POSITION
        """, (owner, name))

    indexes = {}
    for row in cur:
        name = row[1]
        try:
            index = indexes[name]
        except KeyError:
            index = indexes[name] = {
                "owner": row[0],
                "name": name,
                "is_unique": row[2] == "UNIQUE",
                "tablespace": row[3],
                "logging": row[4] == "YES",
                "unusable": row[5] == "UNUSABLE",  # can be VALID or N/A
                "columns": []
            }

        index["columns"].append({"name": row[6], "order": row[7]})

    return list(indexes.values())
Ejemplo n.º 10
0
def get_sig_protein_counts(cur: cx_Oracle.Cursor,
                           dbid: str) -> Dict[str, Dict[str, int]]:
    """
    Return the number of protein matches by each member database signature.
    Only complete sequences are considered

    :param cur: Oracle cursor object
    :param dbid: member database identifier
    :return: dictionary
    """
    partition = MATCH_PARTITIONS[dbid]
    taxon2superkingdom = _get_taxon2superkingdom(cur)
    cur.execute(f"""
        SELECT M.METHOD_AC, P.TAX_ID, COUNT(DISTINCT P.PROTEIN_AC)
        FROM INTERPRO.MATCH PARTITION ({partition}) M
        INNER JOIN INTERPRO.PROTEIN P
            ON P.PROTEIN_AC = M.PROTEIN_AC
        WHERE P.FRAGMENT = 'N'
        GROUP BY M.METHOD_AC, P.TAX_ID
        """)
    counts = {}
    for sig_acc, tax_id, n_proteins in cur:
        try:
            sig = counts[sig_acc]
        except KeyError:
            sig = counts[sig_acc] = {}

        superkingdom = taxon2superkingdom[tax_id]
        try:
            sig[superkingdom] += n_proteins
        except KeyError:
            sig[superkingdom] = n_proteins

    return counts
Ejemplo n.º 11
0
def get_features(cur: cx_Oracle.Cursor) -> dict:
    cur.execute(
        """
        SELECT M.METHOD_AC, M.NAME, D.DBSHORT, V.VERSION, EVI.ABBREV
        FROM INTERPRO.FEATURE_METHOD M
        INNER JOIN INTERPRO.CV_DATABASE D
          ON M.DBCODE = D.DBCODE
        INNER JOIN INTERPRO.DB_VERSION V
          ON D.DBCODE = V.DBCODE
        INNER JOIN INTERPRO.IPRSCAN2DBCODE I2D
          ON D.DBCODE = I2D.DBCODE
        INNER JOIN INTERPRO.CV_EVIDENCE EVI
          ON I2D.EVIDENCE = EVI.CODE
        """
    )
    features = {}
    for row in cur:
        features[row[0]] = {
            "accession": row[0],
            "name": row[1],
            "database": row[2],
            "version": row[3],
            "evidence": row[4]
        }
    return features
Ejemplo n.º 12
0
def truncate_table(cur: Cursor, name: str, reuse_storage: bool = False):
    if reuse_storage:
        sql = f"TRUNCATE TABLE {name} REUSE STORAGE"
    else:
        sql = f"TRUNCATE TABLE {name}"

    cur.execute(sql)
Ejemplo n.º 13
0
def toggle_constraint(cur: Cursor, table: str, constraint: str, enable: bool):
    if enable:
        sql = f"ALTER TABLE {table} ENABLE CONSTRAINT {constraint}"
    else:
        sql = f"ALTER TABLE {table} DISABLE CONSTRAINT {constraint}"

    cur.execute(sql)
Ejemplo n.º 14
0
def update_databases(cur: cx_Oracle.Cursor):
    logger.info("creating table CV_DATABASE")
    oracle.drop_table(cur, "UNIPARC.CV_DATABASE", purge=True)
    cur.execute("""
        CREATE TABLE UNIPARC.CV_DATABASE
        TABLESPACE UNIPARC_TAB
        NOLOGGING
        AS
        SELECT *
        FROM UNIPARC.CV_DATABASE@UAREAD
        """)
    cur.execute("GRANT SELECT ON UNIPARC.CV_DATABASE TO PUBLIC")
    cur.execute("""
        CREATE UNIQUE INDEX PK_CV_DATABASE
        ON UNIPARC.CV_DATABASE (ID)
        TABLESPACE UNIPARC_IND
        NOLOGGING
        """)
    cur.execute("""
        CREATE UNIQUE INDEX UQ_CV_DATABASE$DESCR
        ON UNIPARC.CV_DATABASE (DESCR)
        TABLESPACE UNIPARC_IND
        NOLOGGING
        """)
    cur.execute("""
        ALTER TABLE UNIPARC.CV_DATABASE
        ADD (
          CONSTRAINT PK_CV_DATABASE PRIMARY KEY(ID)
            USING INDEX PK_CV_DATABASE,
          CONSTRAINT UQ_CV_DATABASE$DESCR UNIQUE (DESCR)
            USING INDEX UQ_CV_DATABASE$DESCR
        )
        """)
    oracle.gather_stats(cur, "UNIPARC", "CV_DATABASE")
Ejemplo n.º 15
0
def _get_entries_protein_counts(
        cur: cx_Oracle.Cursor) -> Dict[str, Dict[str, int]]:
    """
    Return the number of protein matched by each InterPro entry.
    Only complete sequences are considered.

    :param cur: Oracle cursor object
    :return: dictionary
    """
    taxon2superkingdom = _get_taxon2superkingdom(cur)
    cur.execute("""
        SELECT EM.ENTRY_AC, P.TAX_ID, COUNT(DISTINCT P.PROTEIN_AC)
        FROM INTERPRO.PROTEIN P
        INNER JOIN INTERPRO.MATCH M ON P.PROTEIN_AC = M.PROTEIN_AC
        INNER JOIN INTERPRO.ENTRY2METHOD EM ON EM.METHOD_AC = M.METHOD_AC
        WHERE P.FRAGMENT = 'N'
        GROUP BY EM.ENTRY_AC, P.TAX_ID
        """)
    counts = {}
    for entry_acc, tax_id, n_proteins in cur:
        try:
            e = counts[entry_acc]
        except KeyError:
            e = counts[entry_acc] = {}

        superkingdom = taxon2superkingdom[tax_id]
        try:
            e[superkingdom] += n_proteins
        except KeyError:
            e[superkingdom] = n_proteins

    return counts
Ejemplo n.º 16
0
def gather_stats(cur: Cursor,
                 schema: str,
                 table: str,
                 partition: Optional[str] = None):
    if partition:
        args = (schema, table, partition)
    else:
        args = (schema, table)

    cur.callproc("DBMS_STATS.GATHER_TABLE_STATS", args)
Ejemplo n.º 17
0
def drop_mview(cur: Cursor, name: str):
    try:
        cur.execute(f"DROP MATERIALIZED VIEW {name}")
    except DatabaseError as exc:
        error, = exc.args

        # ORA-08103: object no longer exists
        # ORA-12003: materialized view does not exist
        if error.code not in (8103, 12003):
            raise exc
Ejemplo n.º 18
0
def drop_table(cur: Cursor, name: str, purge: bool = False):
    if purge:
        sql = f"DROP TABLE {name} PURGE"
    else:
        sql = f"DROP TABLE {name}"

    try:
        cur.execute(sql)
    except DatabaseError as exc:
        error, = exc.args

        # ORA-00942: table or view does not exist
        # ORA-08103: object no longer exists
        if error.code not in (942, 8103):
            raise exc
Ejemplo n.º 19
0
def insertClob(tableName: str, clob, cur: cx_Oracle.Cursor):
    """
  Inserts no more than 4GiB of data into the DB at a time.
  Args:
    clob (clob): https://doc.ispirer.com/sqlways/Output/SQLWays-1-124.html – The CBLOB data type stores character large objects. CLOB can store up to 4 gigabytes of character data.
  """
    if debug:
        print('running insertClob with maxBytesPerInsert: ', maxBytesPerInsert)
    if len(clob) > maxBytesPerInsert:
        clob = generateClob(maxBytesPerInsert)

    clobInsert = 'insert into {tableName} values (:clob)'.format_map(locals())
    cur.execute(clobInsert, {'clob': clob})
    if debug:
        print('returning from insertClob with maxBytesPerInsert: ',
              maxBytesPerInsert)
Ejemplo n.º 20
0
def _get_interpro_releases(cur: cx_Oracle.Cursor) -> tuple:
    cur.execute(
        """
        SELECT VERSION, FILE_DATE
        FROM INTERPRO.DB_VERSION_AUDIT
        WHERE DBCODE = 'I'
        ORDER BY FILE_DATE
        """
    )
    rel_numbers = []
    rel_dates = []
    for row in cur:
        rel_numbers.append(row[0])
        rel_dates.append(row[1])

    return rel_numbers, rel_dates
Ejemplo n.º 21
0
def _get_name_history(cur: cx_Oracle.Cursor) -> dict:
    release_numbers, release_dates = _get_interpro_releases(cur)

    cur.execute(
        """
        SELECT ENTRY_AC, TRIM(NAME) AS NAME, TIMESTAMP
        FROM INTERPRO.ENTRY_AUDIT
        WHERE NAME IS NOT NULL
        ORDER BY TIMESTAMP
        """
    )

    # Get all names assigned to each entry
    entries = {}
    for acc, name, timestamp in cur:
        try:
            entries[acc].append((name, timestamp))
        except KeyError:
            entries[acc] = [(name, timestamp)]

    for acc, names in entries.items():
        # Select the last name given to an entry before each release
        releases = {}
        for name, timestamp in names:
            i = bisect.bisect_left(release_dates, timestamp)
            rel = release_numbers[i - 1]

            if rel not in releases or timestamp > releases[rel]["time"]:
                releases[rel] = {
                    "name": name,
                    "time": timestamp
                }

        names = []
        for rel in sorted(releases.values(), key=lambda x: x["time"]):
            last_name = rel["name"]

            if last_name not in names:
                names.append(last_name)

        entries[acc] = names

    return entries
Ejemplo n.º 22
0
def iter_alignments(cur: cx_Oracle.Cursor):
    cur.execute(
        """
        SELECT QUERY_AC, TARGET_AC, EVALUE, DOMAINS
        FROM INTERPRO.CLAN_MATCH
        """
    )

    for query, target, evalue, clob in cur:
        # DOMAINS is a LOB object: need to call read()

        domains = []
        for start, end in json.loads(clob.read()):
            domains.append({
                "start": start,
                "end": end
            })

        yield query, target, evalue, domains
Ejemplo n.º 23
0
def get_signatures(cur: cx_Oracle.Cursor) -> dict:
    cur.execute(
        """
        SELECT M.METHOD_AC, M.NAME, DB.DBSHORT, EVI.ABBREV,
               E2M.ENTRY_AC, E2M.NAME, E2M.ABBREV, E2M.PARENT_AC
        FROM INTERPRO.METHOD M
        INNER JOIN  INTERPRO.CV_DATABASE DB
          ON M.DBCODE = DB.DBCODE
        INNER JOIN  INTERPRO.IPRSCAN2DBCODE I2D
          ON M.DBCODE = I2D.DBCODE
        INNER JOIN INTERPRO.CV_EVIDENCE EVI
          ON I2D.EVIDENCE = EVI.CODE
        LEFT OUTER JOIN (
          SELECT E2M.METHOD_AC, E.ENTRY_AC, E.NAME, ET.ABBREV, E2E.PARENT_AC
          FROM INTERPRO.ENTRY E
          INNER JOIN INTERPRO.ENTRY2METHOD E2M
            ON E.ENTRY_AC = E2M.ENTRY_AC
          INNER JOIN INTERPRO.CV_ENTRY_TYPE ET
            ON E.ENTRY_TYPE = ET.CODE
          LEFT OUTER JOIN INTERPRO.ENTRY2ENTRY E2E
            ON E.ENTRY_AC = E2E.ENTRY_AC
          WHERE E.CHECKED = 'Y'
        ) E2M
          ON M.METHOD_AC = E2M.METHOD_AC
        """
    )
    signatures = {}
    for row in cur:
        signatures[row[0]] = {
            "accession": row[0],
            "name": row[1] or row[0],
            "database": row[2],
            "evidence": row[3],
            "interpro": [
                ("id", row[4]),
                ("name", row[5]),
                ("type", row[6]),
                ("parent_id", row[7])
            ] if row[4] else None
        }
    return signatures
Ejemplo n.º 24
0
def get_child_tables(cur: Cursor, schema: str, name: str) -> List[tuple]:
    cur.execute(
        """
        SELECT TABLE_NAME, CONSTRAINT_NAME, COLUMN_NAME
        FROM ALL_CONS_COLUMNS
        WHERE OWNER = :schema
        AND CONSTRAINT_NAME IN (
          SELECT CONSTRAINT_NAME
          FROM ALL_CONSTRAINTS
          WHERE CONSTRAINT_TYPE = 'R'             -- Referential
          AND R_CONSTRAINT_NAME IN (
            SELECT CONSTRAINT_NAME
            FROM ALL_CONSTRAINTS
            WHERE CONSTRAINT_TYPE IN ('P', 'U')   -- Primary/Unique
            AND OWNER = :schema
            AND TABLE_NAME = :name
          )
        )
        """, dict(schema=schema, name=name))

    return cur.fetchall()
Ejemplo n.º 25
0
def get_swissprot2reactome(cur: cx_Oracle.Cursor) -> Dict[str, List[tuple]]:
    cur.execute("""
        SELECT DISTINCT E.ACCESSION, D.PRIMARY_ID, D.SECONDARY_ID
        FROM SPTR.DBENTRY@SWPREAD E
        INNER JOIN SPTR.DBENTRY_2_DATABASE@SWPREAD D
          ON E.DBENTRY_ID = D.DBENTRY_ID 
          AND D.DATABASE_ID = 'GK'          -- Reactome
        WHERE E.ENTRY_TYPE = 0              -- Swiss-Prot
          AND E.MERGE_STATUS != 'R'         -- not 'Redundant'
          AND E.DELETED = 'N'               -- not deleted
          AND E.FIRST_PUBLIC IS NOT NULL    -- published
        """)

    proteins = {}
    for uniprot_acc, pathway_id, pathway_name in cur:
        try:
            proteins[uniprot_acc].append((pathway_id, pathway_name))
        except KeyError:
            proteins[uniprot_acc] = [(pathway_id, pathway_name)]

    return proteins
Ejemplo n.º 26
0
    def is_ready(self, cur: Cursor, max_upi: str) -> bool:
        cur.execute(
            """
            SELECT SUM(CNT)
            FROM (
                SELECT COUNT(*) AS CNT
                FROM IPRSCAN.IPM_RUNNING_JOBS@ISPRO
                WHERE ANALYSIS_ID = :analysisid
                  AND JOB_START <= :maxupi
                UNION ALL
                SELECT COUNT(*) AS CNT
                FROM IPRSCAN.IPM_COMPLETED_JOBS@ISPRO
                WHERE ANALYSIS_ID = :analysisid
                  AND JOB_START <= :maxupi AND PERSISTED < :persisted
                UNION ALL
                SELECT COUNT(*) AS CNT
                FROM IPRSCAN.IPM_PERSISTED_JOBS@ISPRO
                WHERE ANALYSIS_ID = :analysisid
                  AND JOB_START <= :maxupi AND PERSISTED < :persisted
            )
            """,
            dict(analysisid=self.id, persisted=self.persisted, maxupi=max_upi))
        if cur.fetchone()[0]:
            return False

        cur.execute(
            """
            SELECT MAX(JOB_END)
            FROM IPRSCAN.IPM_PERSISTED_JOBS@ISPRO
            WHERE ANALYSIS_ID = :1 AND PERSISTED >= :2
            """, (self.id, self.persisted))
        row = cur.fetchone()
        return row and row[0] >= max_upi
Ejemplo n.º 27
0
def rebuild_index(cur: Cursor, name: str, parallel: bool = False):
    if parallel:
        cur.execute(f"ALTER INDEX {name} REBUILD PARALLEL")

        # Prevent the index to be accessed in parallel by default
        cur.execute(f"ALTER INDEX {name} NOPARALLEL")
    else:
        cur.execute(f"ALTER INDEX {name} REBUILD")
Ejemplo n.º 28
0
def get_clans(cur: cx_Oracle.Cursor) -> Dict[str, dict]:
    cur.execute(
        """
        SELECT
          C.CLAN_AC, C.NAME, C.DESCRIPTION, LOWER(D.DBSHORT), M.MEMBER_AC, 
          M.LEN, M.SCORE
        FROM INTERPRO.CLAN C
        INNER JOIN INTERPRO.CV_DATABASE D
          ON C.DBCODE = D.DBCODE
        INNER JOIN INTERPRO.CLAN_MEMBER M
          ON C.CLAN_AC = M.CLAN_AC
        """
    )

    clans = {}
    for row in cur:
        accession = row[0]
        name = row[1]
        descr = row[2]
        database = row[3]
        member_acc = row[4]
        seq_length = row[5]
        score = row[6]

        try:
            c = clans[accession]
        except KeyError:
            c = clans[accession] = {
                "accession": accession,
                "name": name,
                "description": descr,
                "database": database,
                "members": []
            }
        finally:
            c["members"].append((member_acc, score, seq_length))

    return clans
Ejemplo n.º 29
0
def import_from_ispro(cur: Cursor, src: str, dst: str):
    oracle.drop_mview(cur, dst)
    oracle.drop_table(cur, dst, purge=True)
    cur.execute(f"""
        CREATE TABLE IPRSCAN.{dst} NOLOGGING
        AS
        SELECT *
        FROM IPRSCAN.{src}@ISPRO
        """)
    """
    Use remote table name to have fewer characters (no prefix)
    as Oracle < 12.2 do not allow object names longer than 30 characters
    """
    cur.execute(f"""
        CREATE INDEX {src}$ID
        ON IPRSCAN.{dst} (ANALYSIS_ID)
        NOLOGGING
        """)
    cur.execute(f"""
        CREATE INDEX {src}$UPI
        ON IPRSCAN.{dst} (UPI)
        NOLOGGING
        """)
Ejemplo n.º 30
0
def update_proteins(cur: cx_Oracle.Cursor):
    logger.info("creating table PROTEIN")
    oracle.drop_mview(cur, "UNIPARC.PROTEIN")
    oracle.drop_table(cur, "UNIPARC.PROTEIN", purge=True)
    cur.execute("""
        CREATE TABLE UNIPARC.PROTEIN
        TABLESPACE UNIPARC_TAB
        NOLOGGING
        AS
        SELECT UPI, TIMESTAMP, USERSTAMP, CRC64, LEN, SEQ_SHORT, SEQ_LONG, MD5
        FROM UNIPARC.PROTEIN@UAREAD
        """)
    cur.execute("GRANT SELECT ON UNIPARC.PROTEIN TO PUBLIC")

    logger.info("creating index PK_PROTEIN")
    cur.execute("""
        CREATE UNIQUE INDEX PK_PROTEIN
        ON UNIPARC.PROTEIN (UPI)
        TABLESPACE UNIPARC_IND
        """)

    logger.info("gathering statistics on table PROTEIN")
    oracle.gather_stats(cur, "UNIPARC", "PROTEIN")