Beispiel #1
0
def dnld_assc(assc_name, go2obj, prt=sys.stdout):
    """Download association from http://geneontology.org/gene-associations."""
    # Example assc_name: "tair.gaf"
    # Download the Association
    dirloc, assc_base = os.path.split(assc_name)
    if not dirloc:
        dirloc = os.getcwd()
    assc_locfile = os.path.join(dirloc, assc_base) if not dirloc else assc_name
    if not os.path.isfile(assc_locfile):
        # assc_http = "http://geneontology.org/gene-associations/"
        assc_http = "http://current.geneontology.org/annotations/"
        for ext in ['gz']:
            src = os.path.join(assc_http, "{ASSC}.{EXT}".format(ASSC=assc_base,
                                                                EXT=ext))
            dnld_file(src, assc_locfile, prt, loading_bar=None)
    # Read the downloaded association
    assc_orig = read_gaf(assc_locfile, prt)
    if go2obj is None:
        return assc_orig
    # If a GO DAG is provided, use only GO IDs present in the GO DAG
    assc = {}
    goids_dag = set(go2obj.keys())
    for gene, goids_cur in assc_orig.items():
        assc[gene] = goids_cur.intersection(goids_dag)
    return assc
Beispiel #2
0
 def dnld_goa(self, species, ext='gaf', item=None, fileout=None):
     """Download GOA source file name on EMBL-EBI ftp server."""
     basename = self.get_basename(species, ext, item)
     src = os.path.join(self.ftp_src_goa, species.upper(), "{F}.gz".format(F=basename))
     dst = os.path.join(os.getcwd(), basename) if fileout is None else fileout
     dnld_file(src, dst, prt=sys.stdout, loading_bar=None)
     return dst
Beispiel #3
0
def dnld_annotation(assc_file, prt=sys.stdout):
    """Download gaf, gpad, or gpi from http://current.geneontology.org/annotations/"""
    if not os.path.isfile(assc_file):
        # assc_http = "http://geneontology.org/gene-associations/"
        assc_http = "http://current.geneontology.org/annotations/"
        _, assc_base = os.path.split(assc_file)
        src = os.path.join(assc_http, "{ASSC}.gz".format(ASSC=assc_base))
        dnld_file(src, assc_file, prt, loading_bar=None)
Beispiel #4
0
def dnld_annotation(assc_file, prt=sys.stdout):
    """Download gaf, gpad, or gpi from http://current.geneontology.org/annotations/"""
    if not os.path.isfile(assc_file):
        # assc_http = "http://geneontology.org/gene-associations/"
        assc_http = "http://current.geneontology.org/annotations/"
        _, assc_base = os.path.split(assc_file)
        src = os.path.join(assc_http, "{ASSC}.gz".format(ASSC=assc_base))
        dnld_file(src, assc_file, prt, loading_bar=None)
Beispiel #5
0
def dnld_assc(assc_name, go2obj, prt=sys.stdout):
    """Download association from http://geneontology.org/gene-associations."""
    # Example assc_name: "gene_association.tair"
    dirloc, assc_base = os.path.split(assc_name)
    if not dirloc:
        dirloc = os.getcwd()
    assc_local = os.path.join(dirloc, assc_base) if not dirloc else assc_name
    if not os.path.isfile(assc_local):
        assc_http = "http://geneontology.org/gene-associations/"
        for ext in ['gz']:
            src = os.path.join(assc_http, "{ASSC}.{EXT}".format(ASSC=assc_base, EXT=ext))
            dnld_file(src, assc_local, prt, loading_bar=None)
    assc = {}
    goids_dag = set(go2obj.keys())
    for gene, goids_cur in read_gaf(assc_local, prt).items():
        assc[gene] = goids_cur.intersection(goids_dag)
    return assc