def get_child_tables(cur: Cursor, schema: str, name: str) -> List[tuple]: cur.execute( """ SELECT TABLE_NAME, CONSTRAINT_NAME, COLUMN_NAME FROM ALL_CONS_COLUMNS WHERE OWNER = :schema AND CONSTRAINT_NAME IN ( SELECT CONSTRAINT_NAME FROM ALL_CONSTRAINTS WHERE CONSTRAINT_TYPE = 'R' -- Referential AND R_CONSTRAINT_NAME IN ( SELECT CONSTRAINT_NAME FROM ALL_CONSTRAINTS WHERE CONSTRAINT_TYPE IN ('P', 'U') -- Primary/Unique AND OWNER = :schema AND TABLE_NAME = :name ) ) """, dict(schema=schema, name=name)) return cur.fetchall()
def fetchCityData(self, con: cx_Oracle.Connection, cur: cx_Oracle.Cursor, startTime: dt.datetime, endTime: dt.datetime, tag: str) -> pd.core.frame.DataFrame: """fethc weather data of all station of a particular entity Args: con (cx_Oracle.Connection): [connection] cur (cx_Oracle.Cursor): [cursor] startTime (dt.datetime): [startime] endTime (dt.datetime): [endTIme] tag (str): [entityTag] Returns: pd.core.frame.DataFrame: [return weather dataframe of all station of a particular entity] """ guj, mp, mah, chh, goa, dd, dnh = False, False, False, False, False, False, False if tag == "WRLDCMP.SCADA1.A0047000": guj, mp, mah, chh, goa, dd, dnh = True, True, True, True, True, True, True elif tag == "WRLDCMP.SCADA1.A0046957": guj = True elif tag == "WRLDCMP.SCADA1.A0046978": mp = True elif tag == "WRLDCMP.SCADA1.A0046980": mah = True elif tag == "WRLDCMP.SCADA1.A0046945": chh = True elif tag == "WRLDCMP.SCADA1.A0046962": goa = True elif tag == "WRLDCMP.SCADA1.A0046948": dd = True elif tag == "WRLDCMP.SCADA1.A0046953": dnh = True try: df2 = pd.DataFrame() if guj: cur.execute( '''select time,temperature from solar where city='station_1' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_1']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_2' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_2']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_3' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_3']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_4' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_4']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_5' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_5']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_6' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_6']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_7' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_7']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_8' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_8']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_9' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_9']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) if mp: cur.execute( '''select time,temperature from solar where city='station_10' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_10']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_11' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_11']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_12' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_12']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_13' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_13']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_14' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_14']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_15' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_15']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_16' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_16']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) if mah: cur.execute( '''select time,temperature from solar where city='station_17' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_17']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_18' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_18']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_19' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_19']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_20' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_20']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_21' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_21']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_22' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_22']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_23' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_23']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_24' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_24']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_25' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_25']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_26' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_26']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_27' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_27']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_28' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_28']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_29' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_29']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_30' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_30']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_31' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_31']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_32' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_32']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_33' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_33']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_34' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_34']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_35' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_35']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_36' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_36']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_37' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_37']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) if chh: cur.execute( '''select time,temperature from solar where city='station_38' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_38']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_39' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_39']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_40' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_40']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) if goa: cur.execute( '''select time,temperature from solar where city='station_41' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_41']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) if dd: cur.execute( '''select time,temperature from solar where city='station_42' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_42']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) cur.execute( '''select time,temperature from solar where city='station_43' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_43']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) if dnh: cur.execute( '''select time,temperature from solar where city='station_44' and (time between :1 and :2 ) order by time''', (startTime, endTime)) df = pd.DataFrame(cur.fetchall(), columns=['time', 'station_44']) df.set_index('time', inplace=True) df2 = pd.concat([df2, df], axis=1) # print(df2) except Exception as e: print(e) return pd.DataFrame() return df2
def _export_pdb2interpro2go2uniprot(cur: cx_Oracle.Cursor, output: str): # PDBe sequences from UniParc cur.execute( """ SELECT UPI, AC FROM UNIPARC.XREF WHERE DBID = 21 AND DELETED = 'N' """ ) sequences = {} for upi, pdb_acc in cur: if upi in sequences: sequences[upi]["structures"].add(pdb_acc) else: sequences[upi] = { "structures": {pdb_acc}, "entries": set() } # Integrated signatures whose entry is checked and has GO terms cur.execute( """ SELECT METHOD_AC, ENTRY_AC FROM INTERPRO.ENTRY2METHOD WHERE ENTRY_AC IN ( SELECT ENTRY_AC FROM INTERPRO.ENTRY WHERE CHECKED = 'Y' INTERSECT SELECT DISTINCT ENTRY_AC FROM INTERPRO.INTERPRO2GO ) """ ) signatures = dict(cur.fetchall()) # GO terms in InterPro cur.execute( """ SELECT ENTRY_AC, GO_ID FROM INTERPRO.INTERPRO2GO WHERE ENTRY_AC IN ( SELECT ENTRY_AC FROM INTERPRO.ENTRY WHERE CHECKED = 'Y' ) """ ) entries = {} for entry_acc, go_id in cur: if entry_acc in entries: entries[entry_acc].add(go_id) else: entries[entry_acc] = {go_id} # PDBe matches cur.execute( """ SELECT DISTINCT UPI, METHOD_AC FROM IPRSCAN.MV_IPRSCAN WHERE UPI IN ( SELECT UPI FROM UNIPARC.XREF WHERE DBID = 21 AND DELETED = 'N' ) """ ) for upi, signature_acc in cur: try: entry_acc = signatures[signature_acc] except KeyError: pass else: sequences[upi]["entries"].add(entry_acc) # PDBe taxonomy structures = pdbe.get_chain_taxonomy(cur) # UniProt accessions cur.execute( """ SELECT DISTINCT A.AC, B.AC FROM UNIPARC.XREF A LEFT OUTER JOIN UNIPARC.XREF B ON A.UPI = B.UPI WHERE A.DBID = 21 AND A.DELETED = 'N' AND B.DBID IN (2, 3) AND B.DELETED = 'N' """ ) pdb2uniprot = {} for pdb_acc, protein_acc in cur: if not protein_acc: continue elif pdb_acc in pdb2uniprot: pdb2uniprot[pdb_acc].add(protein_acc) else: pdb2uniprot[pdb_acc] = {protein_acc} tmp_path = f"{output}.tmp" with open(tmp_path, "wt") as fh: fh.write("#PDBe ID\tchain\tTaxon ID\t" "InterPro accession\tGO ID\tUniProt accession\n") for seq in sequences.values(): for pdb_acc in seq["structures"]: try: s = structures[pdb_acc] except KeyError: # Structure does not exist in PDBe database continue pdb_id = s["id"] chain = s["chain"] proteins = pdb2uniprot.get(pdb_acc, {''}) for tax_id in s["taxa"]: for entry_acc in seq["entries"]: for go_id in entries[entry_acc]: for protein_acc in proteins: fh.write(f"{pdb_id}\t{chain}\t{tax_id}\t" f"{entry_acc}\t{go_id}\t" f"{protein_acc}\n") try: os.remove(output) except FileNotFoundError: pass finally: os.rename(tmp_path, output) os.chmod(output, 0o775)