def pull_unichem(): # declare the unichem ids for the target data target_uc_url: str = get_latest_unichem_url() xref_file = pull_via_urllib(target_uc_url, 'UC_XREF.txt.gz', decompress=False, subpath='UNICHEM') struct_file = pull_via_urllib(target_uc_url, 'UC_STRUCTURE.txt.gz', subpath='UNICHEM')
def pull_smpdb(): dname = pull_via_urllib('http://smpdb.ca/downloads/', 'smpdb_pathways.csv.zip', decompress=False, subpath='SMPDB') ddir = path.dirname(dname) with ZipFile(dname, 'r') as zipObj: zipObj.extractall(ddir)
def pull_mods(): for mod in mods: subp = modmap[mod] origname = pull_via_urllib('https://fms.alliancegenome.org/download/', f'GENE-DESCRIPTION-JSON_{mod}.json.gz', subpath=subp) #This should be fine. But for the makefile it's nice if the directory in which this goes is the same as the {mod} in the filename. # And we'd like it to be the names of the prefixes if mod != modmap[mod]: newname = origname.replace(mod, modmap[mod]) os.rename(origname, newname)
def pull_unii(): for (pullfile,originalprefix,finalname) in [('UNIIs.zip','UNII_Names','Latest_UNII_Names.txt'), ('UNII_Data.zip','UNII_Records','Latest_UNII_Records.txt')]: dname = pull_via_urllib('https://fdasis.nlm.nih.gov/srs/download/srs/',pullfile,decompress=False,subpath='UNII') ddir = path.dirname(dname) with ZipFile(dname, 'r') as zipObj: zipObj.extractall(ddir) #this zip file unzips into a readme and a file named something like "UNII_Names_<date>.txt" and we need to rename it for make files = listdir(ddir) for filename in files: if filename.startswith(originalprefix): original = path.join(ddir,filename) final = path.join(ddir,finalname) rename(original,final)
def pull_omim(): pull_via_urllib('https://www.omim.org/static/omim/data/', 'mim2gene.txt', subpath='OMIM', decompress=False)
def pull_orphanet(): pull_via_urllib('http://www.orphadata.org/data/RD-CODE/Packs/', 'Orphanet_Nomenclature_Pack_EN.zip', subpath='Orphanet', decompress=False)
def pull_one_uniprotkb(which): pull_via_urllib('ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/',f'uniprot_{which}.fasta.gz',subpath='UniProtKB')
def pull_uniprotkb(): pull_via_urllib('ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/',f'idmapping.dat.gz',subpath='UniProtKB') for which in ['sprot','trembl']: pull_via_urllib('ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/',f'uniprot_{which}.fasta.gz',subpath='UniProtKB')
def pull_doid(): pull_via_urllib('https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/src/ontology/','doid.json', subpath='DOID', decompress=False)
def pull_drugcentral(): dname = pull_via_urllib('https://unmtid-shinyapps.net/download/','structures.smiles.tsv',decompress=False,subpath='DRUGCENTRAL')
def pull_ncit(): #Currently, just pull a mapping we need. pull_via_urllib('https://evs.nci.nih.gov/ftp1/NCI_Thesaurus/Mappings/', f'NCIt-SwissProt_Mapping.txt', subpath='NCIT', decompress=False)
def pull_rhea(): outputfile = pull_via_urllib('https://ftp.expasy.org/databases/rhea/rdf/', 'rhea.rdf.gz', subpath='RHEA', decompress=True)
def pull_hmdb(): dname = pull_via_urllib('https://hmdb.ca/system/downloads/current/','hmdb_metabolites.zip',decompress=False,subpath='HMDB') ddir = path.dirname(dname) with ZipFile(dname, 'r') as zipObj: zipObj.extractall(ddir)
def pull_gtopdb_ligands(): pull_via_urllib('https://www.guidetopharmacology.org/DATA/','ligands.tsv',decompress=False,subpath='GTOPDB')