Esempio n. 1
0
def get_child_tables(cur: Cursor, schema: str, name: str) -> List[tuple]:
    cur.execute(
        """
        SELECT TABLE_NAME, CONSTRAINT_NAME, COLUMN_NAME
        FROM ALL_CONS_COLUMNS
        WHERE OWNER = :schema
        AND CONSTRAINT_NAME IN (
          SELECT CONSTRAINT_NAME
          FROM ALL_CONSTRAINTS
          WHERE CONSTRAINT_TYPE = 'R'             -- Referential
          AND R_CONSTRAINT_NAME IN (
            SELECT CONSTRAINT_NAME
            FROM ALL_CONSTRAINTS
            WHERE CONSTRAINT_TYPE IN ('P', 'U')   -- Primary/Unique
            AND OWNER = :schema
            AND TABLE_NAME = :name
          )
        )
        """, dict(schema=schema, name=name))

    return cur.fetchall()
Esempio n. 2
0
    def fetchCityData(self, con: cx_Oracle.Connection, cur: cx_Oracle.Cursor,
                      startTime: dt.datetime, endTime: dt.datetime,
                      tag: str) -> pd.core.frame.DataFrame:
        """fethc weather data of all station of a particular entity

        Args:
            con (cx_Oracle.Connection): [connection]
            cur (cx_Oracle.Cursor): [cursor]
            startTime (dt.datetime): [startime]
            endTime (dt.datetime): [endTIme]
            tag (str): [entityTag]

        Returns:
            pd.core.frame.DataFrame: [return weather dataframe of all station of a particular entity]
        """
        guj, mp, mah, chh, goa, dd, dnh = False, False, False, False, False, False, False
        if tag == "WRLDCMP.SCADA1.A0047000":
            guj, mp, mah, chh, goa, dd, dnh = True, True, True, True, True, True, True
        elif tag == "WRLDCMP.SCADA1.A0046957":
            guj = True
        elif tag == "WRLDCMP.SCADA1.A0046978":
            mp = True
        elif tag == "WRLDCMP.SCADA1.A0046980":
            mah = True
        elif tag == "WRLDCMP.SCADA1.A0046945":
            chh = True
        elif tag == "WRLDCMP.SCADA1.A0046962":
            goa = True
        elif tag == "WRLDCMP.SCADA1.A0046948":
            dd = True
        elif tag == "WRLDCMP.SCADA1.A0046953":
            dnh = True
        try:
            df2 = pd.DataFrame()
            if guj:
                cur.execute(
                    '''select time,temperature from solar where city='station_1' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_1'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_2' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_2'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_3' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_3'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_4' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_4'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_5' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_5'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_6' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_6'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_7' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_7'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_8' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_8'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_9' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_9'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

            if mp:
                cur.execute(
                    '''select time,temperature from solar where city='station_10' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_10'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_11' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_11'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar  where city='station_12' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_12'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar  where city='station_13' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_13'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar  where city='station_14' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_14'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar  where city='station_15' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_15'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar  where city='station_16' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_16'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

            if mah:
                cur.execute(
                    '''select time,temperature from solar where city='station_17' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_17'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_18' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_18'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_19' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_19'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_20' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_20'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_21' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_21'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_22' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_22'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_23' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_23'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_24' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_24'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_25' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_25'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_26' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_26'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_27' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_27'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_28' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_28'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_29' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_29'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_30' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_30'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_31' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_31'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_32' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_32'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_33' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_33'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_34' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_34'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_35' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_35'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_36' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_36'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_37' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_37'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

            if chh:
                cur.execute(
                    '''select time,temperature from solar where city='station_38' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_38'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_39' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_39'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_40' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_40'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

            if goa:
                cur.execute(
                    '''select time,temperature from solar where city='station_41' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_41'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

            if dd:
                cur.execute(
                    '''select time,temperature from solar where city='station_42' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_42'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

                cur.execute(
                    '''select time,temperature from solar where city='station_43' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_43'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)

            if dnh:
                cur.execute(
                    '''select time,temperature from solar where city='station_44' and (time between :1 and :2 ) order by time''',
                    (startTime, endTime))
                df = pd.DataFrame(cur.fetchall(),
                                  columns=['time', 'station_44'])
                df.set_index('time', inplace=True)
                df2 = pd.concat([df2, df], axis=1)
                # print(df2)
        except Exception as e:
            print(e)
            return pd.DataFrame()
        return df2
Esempio n. 3
0
def _export_pdb2interpro2go2uniprot(cur: cx_Oracle.Cursor, output: str):
    # PDBe sequences from UniParc
    cur.execute(
        """
        SELECT UPI, AC
        FROM UNIPARC.XREF
        WHERE DBID = 21
        AND DELETED = 'N'
        """
    )
    sequences = {}
    for upi, pdb_acc in cur:
        if upi in sequences:
            sequences[upi]["structures"].add(pdb_acc)
        else:
            sequences[upi] = {
                "structures": {pdb_acc},
                "entries": set()
            }

    # Integrated signatures whose entry is checked and has GO terms
    cur.execute(
        """
        SELECT METHOD_AC, ENTRY_AC
        FROM INTERPRO.ENTRY2METHOD
        WHERE ENTRY_AC IN (
          SELECT ENTRY_AC
          FROM INTERPRO.ENTRY
          WHERE CHECKED = 'Y'
          INTERSECT
          SELECT DISTINCT ENTRY_AC
          FROM INTERPRO.INTERPRO2GO
        )
        """
    )
    signatures = dict(cur.fetchall())

    # GO terms in InterPro
    cur.execute(
        """
        SELECT ENTRY_AC, GO_ID
        FROM INTERPRO.INTERPRO2GO
        WHERE ENTRY_AC IN (
          SELECT ENTRY_AC
          FROM INTERPRO.ENTRY
          WHERE CHECKED = 'Y'
        )
        """
    )
    entries = {}
    for entry_acc, go_id in cur:
        if entry_acc in entries:
            entries[entry_acc].add(go_id)
        else:
            entries[entry_acc] = {go_id}

    # PDBe matches
    cur.execute(
        """
        SELECT DISTINCT UPI, METHOD_AC
        FROM IPRSCAN.MV_IPRSCAN
        WHERE UPI IN (
            SELECT UPI
            FROM UNIPARC.XREF
            WHERE DBID = 21
            AND DELETED = 'N'
        )
        """
    )
    for upi, signature_acc in cur:
        try:
            entry_acc = signatures[signature_acc]
        except KeyError:
            pass
        else:
            sequences[upi]["entries"].add(entry_acc)

    # PDBe taxonomy
    structures = pdbe.get_chain_taxonomy(cur)

    # UniProt accessions
    cur.execute(
        """
        SELECT DISTINCT A.AC, B.AC
        FROM UNIPARC.XREF A
        LEFT OUTER JOIN UNIPARC.XREF B ON A.UPI = B.UPI
        WHERE A.DBID = 21
        AND A.DELETED = 'N'
        AND B.DBID IN (2, 3)
        AND B.DELETED = 'N'
        """
    )
    pdb2uniprot = {}
    for pdb_acc, protein_acc in cur:
        if not protein_acc:
            continue
        elif pdb_acc in pdb2uniprot:
            pdb2uniprot[pdb_acc].add(protein_acc)
        else:
            pdb2uniprot[pdb_acc] = {protein_acc}

    tmp_path = f"{output}.tmp"
    with open(tmp_path, "wt") as fh:
        fh.write("#PDBe ID\tchain\tTaxon ID\t"
                 "InterPro accession\tGO ID\tUniProt accession\n")

        for seq in sequences.values():
            for pdb_acc in seq["structures"]:
                try:
                    s = structures[pdb_acc]
                except KeyError:
                    # Structure does not exist in PDBe database
                    continue

                pdb_id = s["id"]
                chain = s["chain"]
                proteins = pdb2uniprot.get(pdb_acc, {''})

                for tax_id in s["taxa"]:
                    for entry_acc in seq["entries"]:
                        for go_id in entries[entry_acc]:
                            for protein_acc in proteins:
                                fh.write(f"{pdb_id}\t{chain}\t{tax_id}\t"
                                         f"{entry_acc}\t{go_id}\t"
                                         f"{protein_acc}\n")

    try:
        os.remove(output)
    except FileNotFoundError:
        pass
    finally:
        os.rename(tmp_path, output)
        os.chmod(output, 0o775)