def downloadBioMarkers(): url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/Hs_exon_tissue-specific_protein_coding.zip' print 'Downloading BioMarker associations' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/BioMarkers/','') url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/Mm_gene_tissue-specific_protein_coding.zip' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/BioMarkers/','')
def downloadMiRDatabases(species): url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/'+species+'_microRNA-Ensembl-GOElite_strict.txt' selected = ['Hs','Mm','Rn'] ### these are simply zipped where the others are not ### These files should be updated on a regular basis if species in selected: url = string.replace(url,'.txt','.zip') fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/microRNATargets/','') else: ### Where strict is too strict url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/'+species+'_microRNA-Ensembl-GOElite_lax.txt' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/microRNATargets/','') fln = string.replace(fln,'.zip','.txt') return fln
def downloadDiseaseOntologyOBO(): print 'Downloading Disease Ontology structure and associations' """ Unfortunately, we have to download versions that are not as frequently updated, since RGDs server reliability is poor """ #url = 'ftp://rgd.mcw.edu/pub/data_release/ontology_obo_files/disease/CTD.obo' url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/CTD.obo' ### Includes congenital and environmental diseases - http://ctdbase.org/detail.go?type=disease&acc=MESH%3aD002318 fln,status = update.downloadSuppressPrintOuts(url,'OBO/','')
def downloadDiseaseOntologyGeneAssociations(selected_species): if selected_species == None: sc = [] else: sc = selected_species """ Unfortunately, we have to download versions that are not as frequently updated, since RGDs server reliability is poor """ if 'Hs' in sc or len(sc)==0: #url = 'ftp://rgd.mcw.edu/pub/data_release/annotated_rgd_objects_by_ontology/homo_genes_do' url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/homo_genes_do' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Disease/','') if 'Mm' in sc or len(sc)==0: #url = 'ftp://rgd.mcw.edu/pub/data_release/annotated_rgd_objects_by_ontology/mus_genes_do' url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/mus_genes_do' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Disease/','') if 'Rn' in sc or len(sc)==0: #url = 'ftp://rgd.mcw.edu/pub/data_release/annotated_rgd_objects_by_ontology/rattus_genes_do' url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/rattus_genes_do' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Disease/','')
def downloadDomainAssociations(selected_species): paths=[] if selected_species != None: ### Restrict to selected species only current_species_dirs=selected_species else: current_species_dirs = unique.read_directory('/'+database_dir) for species in current_species_dirs: url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/Domains/'+species+'_Ensembl-Domain.gz' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Domains/','txt') if 'Internet' not in status: paths.append((species,fln)) return paths
def importUniProtAnnotations(species_db): base_url = 'http://www.altanalyze.org/archiveDBs/' uniprot_ensembl_db={} for species in species_db: url = base_url+species+'/custom_annotations.txt' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/UniProt/'+species+'/','') for line in open(fln,'rU').xreadlines(): data = cleanUpLine(line) ens_gene,compartment,function,symbols,full_name,uniprot_name,uniprot_ids,unigene = string.split(data,'\t') symbols = string.split(string.replace(symbols,'; Synonyms=',', '),', ') uniprot_ensembl_db[species,uniprot_name] = ens_gene species_extension = string.split(uniprot_name,'_')[-1] full_name = string.split(full_name,';')[0] if 'Transcription factor' in full_name: symbols.append(string.split(full_name,'Transcription factor ')[-1]) ### Add this additional synonym to symbols ### Extend this database out to account for weird names in PAZAR for symbol in symbols: new_name = string.upper(symbol)+'_'+species_extension if new_name not in uniprot_ensembl_db: uniprot_ensembl_db[species,symbol+'_'+species_extension] = ens_gene uniprot_ensembl_db[species,string.upper(symbol)] = ens_gene return uniprot_ensembl_db
def downloadPAZARAssocations(): base_url = 'http://www.pazar.info/tftargets/' filenames = getPAZARFileNames() print 'Downloading Transcription Factor to Target associations' source = 'raw' r = 4; k = -1 for resource in filenames: filename = filenames[resource] url = base_url+filename start_time = time.time() fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PAZAR/','') end_time = time.time() if (end_time-start_time)>3: ### Hence the internet connection is very slow (will take forever to get everything) downloadPreCompiledPAZAR() ### Just get the compiled symbol data instead print '...access to source PAZAR files too slow, getting pre-compiled from genmapp.org' source = 'precompiled' break k+=1 if r==k: k=0 print '*', print '' return source
def downloadGOSlimOBO(): url = 'http://www.geneontology.org/GO_slims/goslim_pir.obo' #url = 'http://www.geneontology.org/GO_slims/goslim_generic.obo' ### Missing fln,status = update.downloadSuppressPrintOuts(url,'OBO/','')
def downloadPathwayCommons(): print 'Downloading PathwayCommons associations' url = 'http://www.pathwaycommons.org/pc-snapshot/current-release/gsea/by_species/h**o-sapiens-9606-gene-symbol.gmt.zip' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PathwayCommons/','')
def downloadPhenotypeOntologyGeneAssociations(): url = 'ftp://ftp.informatics.jax.org/pub/reports/HMD_HumanPhenotype.rpt' #url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/HMD_HumanPhenotype.rpt' ### Mouse and human gene symbols and gene IDs (use the gene symbols) fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Pheno/','')
def downloadPhenotypeOntologyOBO(): print 'Downloading Phenotype Ontology structure and associations' url = 'ftp://ftp.informatics.jax.org/pub/reports/MPheno_OBO.ontology' fln,status = update.downloadSuppressPrintOuts(url,'OBO/','')
def downloadKEGGPathways(species): print "Integrating KEGG associations for "+species url = 'http://www.genmapp.org/go_elite/Databases/KEGG/'+species+'-KEGG_20110518.zip' ### This is a fixed date resource since KEGG licensed their material after this date fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/KEGG/','')
def downloadAmadeusPredictions(): url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/symbol-Metazoan-Amadeus.txt' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Amadeus/','')
def downloadPreCompiledPAZAR(): """ Downloads the already merged symbol to TF file from PAZAR files """ url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/tf-target.txt' fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PAZAR/symbol/','')