Esempio n. 1
0
def downloadBioMarkers():
    url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/Hs_exon_tissue-specific_protein_coding.zip'
    print 'Downloading BioMarker associations'
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/BioMarkers/','')
    
    url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/Mm_gene_tissue-specific_protein_coding.zip'
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/BioMarkers/','')
Esempio n. 2
0
def downloadMiRDatabases(species):
    url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/'+species+'_microRNA-Ensembl-GOElite_strict.txt'
    selected = ['Hs','Mm','Rn'] ### these are simply zipped where the others are not
    ### These files should be updated on a regular basis
    if species in selected:
        url = string.replace(url,'.txt','.zip')
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/microRNATargets/','')
    else:
        ### Where strict is too strict
        url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/'+species+'_microRNA-Ensembl-GOElite_lax.txt'
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/microRNATargets/','')
    fln = string.replace(fln,'.zip','.txt')
    return fln
Esempio n. 3
0
def downloadDiseaseOntologyOBO():
    print 'Downloading Disease Ontology structure and associations'
    
    """ Unfortunately, we have to download versions that are not as frequently updated, since RGDs server
        reliability is poor """
    #url = 'ftp://rgd.mcw.edu/pub/data_release/ontology_obo_files/disease/CTD.obo'
    url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/CTD.obo'
    
    ### Includes congenital and environmental diseases - http://ctdbase.org/detail.go?type=disease&acc=MESH%3aD002318
    fln,status = update.downloadSuppressPrintOuts(url,'OBO/','')
Esempio n. 4
0
def downloadDiseaseOntologyGeneAssociations(selected_species):
    if selected_species == None: sc = []
    else: sc = selected_species
    """ Unfortunately, we have to download versions that are not as frequently updated, since RGDs server
        reliability is poor """
        
    if 'Hs' in sc or len(sc)==0:
        #url = 'ftp://rgd.mcw.edu/pub/data_release/annotated_rgd_objects_by_ontology/homo_genes_do'
        url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/homo_genes_do'
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Disease/','')

    if 'Mm' in sc or len(sc)==0:
        #url = 'ftp://rgd.mcw.edu/pub/data_release/annotated_rgd_objects_by_ontology/mus_genes_do'
        url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/mus_genes_do'
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Disease/','')

    if 'Rn' in sc or len(sc)==0:
        #url = 'ftp://rgd.mcw.edu/pub/data_release/annotated_rgd_objects_by_ontology/rattus_genes_do'
        url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/rattus_genes_do'
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Disease/','')
Esempio n. 5
0
def downloadDomainAssociations(selected_species):
    paths=[]
    if selected_species != None: ### Restrict to selected species only
        current_species_dirs=selected_species
    else:
        current_species_dirs = unique.read_directory('/'+database_dir)
    for species in current_species_dirs:
        url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/Domains/'+species+'_Ensembl-Domain.gz'
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Domains/','txt')
        if 'Internet' not in status:
            paths.append((species,fln))
    return paths
Esempio n. 6
0
def importUniProtAnnotations(species_db):
    base_url = 'http://www.altanalyze.org/archiveDBs/'
    uniprot_ensembl_db={}
    for species in species_db:
        url = base_url+species+'/custom_annotations.txt'
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/UniProt/'+species+'/','')
        for line in open(fln,'rU').xreadlines():
            data = cleanUpLine(line)
            ens_gene,compartment,function,symbols,full_name,uniprot_name,uniprot_ids,unigene = string.split(data,'\t')
            symbols = string.split(string.replace(symbols,'; Synonyms=',', '),', ')
            uniprot_ensembl_db[species,uniprot_name] = ens_gene
            species_extension = string.split(uniprot_name,'_')[-1]
            full_name = string.split(full_name,';')[0]
            if 'Transcription factor' in full_name:
                symbols.append(string.split(full_name,'Transcription factor ')[-1]) ### Add this additional synonym to symbols
            ### Extend this database out to account for weird names in PAZAR
            for symbol in symbols:
                new_name = string.upper(symbol)+'_'+species_extension
                if new_name not in uniprot_ensembl_db:
                    uniprot_ensembl_db[species,symbol+'_'+species_extension] = ens_gene
                uniprot_ensembl_db[species,string.upper(symbol)] = ens_gene
    return uniprot_ensembl_db
Esempio n. 7
0
def downloadPAZARAssocations():
    base_url = 'http://www.pazar.info/tftargets/'
    filenames = getPAZARFileNames()
    print 'Downloading Transcription Factor to Target associations'
    source = 'raw'
    r = 4; k = -1
    for resource in filenames:
        filename = filenames[resource]
        url = base_url+filename
        start_time = time.time()
        fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PAZAR/','')
        end_time = time.time()
        if (end_time-start_time)>3: ### Hence the internet connection is very slow (will take forever to get everything)
            downloadPreCompiledPAZAR() ### Just get the compiled symbol data instead
            print '...access to source PAZAR files too slow, getting pre-compiled from genmapp.org'
            source = 'precompiled'
            break
        k+=1
        if r==k:
            k=0
            print '*',
    print ''
    return source
Esempio n. 8
0
def downloadGOSlimOBO():
    url = 'http://www.geneontology.org/GO_slims/goslim_pir.obo'
    #url = 'http://www.geneontology.org/GO_slims/goslim_generic.obo'  ### Missing 
    fln,status = update.downloadSuppressPrintOuts(url,'OBO/','')
Esempio n. 9
0
def downloadPathwayCommons():
    print 'Downloading PathwayCommons associations'
    url = 'http://www.pathwaycommons.org/pc-snapshot/current-release/gsea/by_species/h**o-sapiens-9606-gene-symbol.gmt.zip'
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PathwayCommons/','')
Esempio n. 10
0
def downloadPhenotypeOntologyGeneAssociations():
    url = 'ftp://ftp.informatics.jax.org/pub/reports/HMD_HumanPhenotype.rpt'
    #url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/HMD_HumanPhenotype.rpt'
    ### Mouse and human gene symbols and gene IDs (use the gene symbols)
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Pheno/','')
Esempio n. 11
0
def downloadPhenotypeOntologyOBO():
    print 'Downloading Phenotype Ontology structure and associations'
    url = 'ftp://ftp.informatics.jax.org/pub/reports/MPheno_OBO.ontology'
    fln,status = update.downloadSuppressPrintOuts(url,'OBO/','')
Esempio n. 12
0
def downloadKEGGPathways(species):
    print "Integrating KEGG associations for "+species
    url = 'http://www.genmapp.org/go_elite/Databases/KEGG/'+species+'-KEGG_20110518.zip'
    ### This is a fixed date resource since KEGG licensed their material after this date
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/KEGG/','')
Esempio n. 13
0
def downloadAmadeusPredictions():
    url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/symbol-Metazoan-Amadeus.txt'
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/Amadeus/','')
Esempio n. 14
0
def downloadPreCompiledPAZAR():
    """ Downloads the already merged symbol to TF file from PAZAR files """
    url = 'http://www.genmapp.org/go_elite/Databases/ExternalSystems/tf-target.txt'
    fln,status = update.downloadSuppressPrintOuts(url,'BuildDBs/PAZAR/symbol/','')