def dnld_assc(assc_name, go2obj, prt=sys.stdout): """Download association from http://geneontology.org/gene-associations.""" # Example assc_name: "tair.gaf" # Download the Association dirloc, assc_base = os.path.split(assc_name) if not dirloc: dirloc = os.getcwd() assc_locfile = os.path.join(dirloc, assc_base) if not dirloc else assc_name if not os.path.isfile(assc_locfile): # assc_http = "http://geneontology.org/gene-associations/" assc_http = "http://current.geneontology.org/annotations/" for ext in ['gz']: src = os.path.join(assc_http, "{ASSC}.{EXT}".format(ASSC=assc_base, EXT=ext)) dnld_file(src, assc_locfile, prt, loading_bar=None) # Read the downloaded association assc_orig = read_gaf(assc_locfile, prt) if go2obj is None: return assc_orig # If a GO DAG is provided, use only GO IDs present in the GO DAG assc = {} goids_dag = set(go2obj.keys()) for gene, goids_cur in assc_orig.items(): assc[gene] = goids_cur.intersection(goids_dag) return assc
def dnld_goa(self, species, ext='gaf', item=None, fileout=None): """Download GOA source file name on EMBL-EBI ftp server.""" basename = self.get_basename(species, ext, item) src = os.path.join(self.ftp_src_goa, species.upper(), "{F}.gz".format(F=basename)) dst = os.path.join(os.getcwd(), basename) if fileout is None else fileout dnld_file(src, dst, prt=sys.stdout, loading_bar=None) return dst
def dnld_annotation(assc_file, prt=sys.stdout): """Download gaf, gpad, or gpi from http://current.geneontology.org/annotations/""" if not os.path.isfile(assc_file): # assc_http = "http://geneontology.org/gene-associations/" assc_http = "http://current.geneontology.org/annotations/" _, assc_base = os.path.split(assc_file) src = os.path.join(assc_http, "{ASSC}.gz".format(ASSC=assc_base)) dnld_file(src, assc_file, prt, loading_bar=None)
def dnld_assc(assc_name, go2obj, prt=sys.stdout): """Download association from http://geneontology.org/gene-associations.""" # Example assc_name: "gene_association.tair" dirloc, assc_base = os.path.split(assc_name) if not dirloc: dirloc = os.getcwd() assc_local = os.path.join(dirloc, assc_base) if not dirloc else assc_name if not os.path.isfile(assc_local): assc_http = "http://geneontology.org/gene-associations/" for ext in ['gz']: src = os.path.join(assc_http, "{ASSC}.{EXT}".format(ASSC=assc_base, EXT=ext)) dnld_file(src, assc_local, prt, loading_bar=None) assc = {} goids_dag = set(go2obj.keys()) for gene, goids_cur in read_gaf(assc_local, prt).items(): assc[gene] = goids_cur.intersection(goids_dag) return assc